Add files using upload-large-folder tool
Browse files- .gitattributes +35 -35
- course_feedback_nlp/Coursera_courses.csv +624 -0
- course_feedback_nlp/Untitled.ipynb +418 -0
- course_feedback_nlp/evaluate.py +548 -0
- course_feedback_nlp/requirements.txt +7 -0
- course_feedback_nlp/test.py +52 -0
- course_feedback_nlp/train.py +862 -0
- course_feedback_nlp/train_3_classes.py +872 -0
- dropout_binaryclass/correlation.py +218 -0
- dropout_binaryclass/data.csv +0 -0
- dropout_binaryclass/feature_importance.png +0 -0
- dropout_binaryclass/feature_selection_recommendations.txt +42 -0
- dropout_binaryclass/model_config.json +411 -0
- dropout_binaryclass/predict_students_dropout_and_academic_success_model.pkl +0 -0
- dropout_binaryclass/redundant_feature_pairs.csv +16 -0
- dropout_binaryclass/target_correlations.csv +37 -0
- dropout_binaryclass/train.ipynb +0 -0
- dropout_binaryclass/train.py +224 -0
- grade_multiclass/02_grade_distribution.png +0 -0
- grade_multiclass/03_performance_index_distribution.png +0 -0
- grade_multiclass/04_features_by_grade.png +0 -0
- grade_multiclass/05_extracurricular_analysis.png +0 -0
- grade_multiclass/06_correlation_heatmap.png +0 -0
- grade_multiclass/09_feature_importance.png +0 -0
- grade_multiclass/10_learning_curves.png +0 -0
- grade_multiclass/11_model_comparison.png +0 -0
- grade_multiclass/Student_Performance.csv +0 -0
- grade_multiclass/correlation_heatmap.png +0 -0
- grade_multiclass/feature_importance.png +0 -0
- grade_multiclass/features_by_grade.png +0 -0
- grade_multiclass/learning_curves.png +0 -0
- grade_multiclass/model_comparison.png +0 -0
- grade_multiclass/student_performance_classification.ipynb +0 -0
- grade_multiclass/student_performance_classification.py +1100 -0
- grade_multiclass/target_distribution.png +0 -0
- lr_attendance/2018-2019_Daily_Attendance_20240429.csv +0 -0
- lr_attendance/add_weather_features.py +195 -0
- lr_attendance/best_model_coefficients.csv +13 -0
- lr_attendance/explore_data.py +28 -0
- lr_attendance/feature_engineering.py +154 -0
- lr_attendance/feature_info.json +118 -0
- lr_attendance/final_coefficients.csv +13 -0
- lr_attendance/final_predictions.csv +0 -0
- lr_attendance/improved_predictions.csv +0 -0
- lr_attendance/model_comparison.csv +5 -0
- lr_attendance/model_summary.csv +2 -0
- lr_attendance/nyc_weather_2018_2019.csv +297 -0
- lr_attendance/prepare_for_modeling.py +215 -0
- lr_attendance/train.ipynb +1140 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
course_feedback_nlp/Coursera_courses.csv
ADDED
|
@@ -0,0 +1,624 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name,institution,course_url,course_id
|
| 2 |
+
Machine Learning,Stanford University,https://www.coursera.org/learn/machine-learning,machine-learning
|
| 3 |
+
Indigenous Canada,University of Alberta,https://www.coursera.org/learn/indigenous-canada,indigenous-canada
|
| 4 |
+
The Science of Well-Being,Yale University,https://www.coursera.org/learn/the-science-of-well-being,the-science-of-well-being
|
| 5 |
+
Technical Support Fundamentals,Google,https://www.coursera.org/learn/technical-support-fundamentals,technical-support-fundamentals
|
| 6 |
+
Become a CBRS Certified Professional Installer by Google,Google - Spectrum Sharing,https://www.coursera.org/learn/google-cbrs-cpi-training,google-cbrs-cpi-training
|
| 7 |
+
Financial Markets,Yale University,https://www.coursera.org/learn/financial-markets-global,financial-markets-global
|
| 8 |
+
Introduction to Psychology,Yale University,https://www.coursera.org/learn/introduction-psychology,introduction-psychology
|
| 9 |
+
Programming for Everybody (Getting Started with Python),University of Michigan,https://www.coursera.org/learn/python,python
|
| 10 |
+
The Bits and Bytes of Computer Networking,Google,https://www.coursera.org/learn/computer-networking,computer-networking
|
| 11 |
+
AI For Everyone,DeepLearning.AI,https://www.coursera.org/learn/ai-for-everyone,ai-for-everyone
|
| 12 |
+
Crash Course on Python,Google,https://www.coursera.org/learn/python-crash-course,python-crash-course
|
| 13 |
+
Psychological First Aid,Johns Hopkins University,https://www.coursera.org/learn/psychological-first-aid,psychological-first-aid
|
| 14 |
+
Neural Networks and Deep Learning,DeepLearning.AI,https://www.coursera.org/learn/neural-networks-deep-learning,neural-networks-deep-learning
|
| 15 |
+
What is Data Science?,IBM,https://www.coursera.org/learn/what-is-datascience,what-is-datascience
|
| 16 |
+
Successful Negotiation: Essential Strategies and Skills,University of Michigan,https://www.coursera.org/learn/negotiation-skills,negotiation-skills
|
| 17 |
+
Fundamentals of Project Planning and Management,University of Virginia,https://www.coursera.org/learn/uva-darden-project-management,uva-darden-project-management
|
| 18 |
+
Project Launch,"University of California, Irvine",https://www.coursera.org/learn/project-management,project-management
|
| 19 |
+
"Brand Management: Aligning Business, Brand and Behaviour",London Business School,https://www.coursera.org/learn/brand,brand
|
| 20 |
+
Writing in the Sciences,Stanford University,https://www.coursera.org/learn/sciwrite,sciwrite
|
| 21 |
+
Stanford Introduction to Food and Health,Stanford University,https://www.coursera.org/learn/food-and-health,food-and-health
|
| 22 |
+
"HTML, CSS, and Javascript for Web Developers",Johns Hopkins University,https://www.coursera.org/learn/html-css-javascript-for-web-developers,html-css-javascript-for-web-developers
|
| 23 |
+
Excel Skills for Business: Essentials,Macquarie University,https://www.coursera.org/learn/excel-essentials,excel-essentials
|
| 24 |
+
Introduction to Negotiation: A Strategic Playbook for Becoming a Principled and Persuasive Negotiator,Yale University,https://www.coursera.org/learn/negotiation,negotiation
|
| 25 |
+
"Everyday Excel, Part 1",University of Colorado Boulder,https://www.coursera.org/learn/everyday-excel-part-1,everyday-excel-part-1
|
| 26 |
+
Learning How to Learn: Powerful mental tools to help you master tough subjects,University of California San Diego,https://www.coursera.org/learn/learning-how-to-learn,learning-how-to-learn
|
| 27 |
+
Google Cloud Platform Fundamentals: Core Infrastructure,Google Cloud,https://www.coursera.org/learn/gcp-fundamentals,gcp-fundamentals
|
| 28 |
+
Viral Marketing and How to Craft Contagious Content,University of Pennsylvania,https://www.coursera.org/learn/wharton-contagious-viral-marketing,wharton-contagious-viral-marketing
|
| 29 |
+
Python Data Structures,University of Michigan,https://www.coursera.org/learn/python-data,python-data
|
| 30 |
+
Private Equity and Venture Capital,Università Bocconi,https://www.coursera.org/learn/private-equity,private-equity
|
| 31 |
+
First Step Korean,Yonsei University,https://www.coursera.org/learn/learn-korean,learn-korean
|
| 32 |
+
"Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning",DeepLearning.AI,https://www.coursera.org/learn/introduction-tensorflow,introduction-tensorflow
|
| 33 |
+
Operating Systems and You: Becoming a Power User,Google,https://www.coursera.org/learn/os-power-user,os-power-user
|
| 34 |
+
Tools for Data Science,IBM,https://www.coursera.org/learn/open-source-tools-for-data-science,open-source-tools-for-data-science
|
| 35 |
+
"Improving Deep Neural Networks: Hyperparameter tuning, Regularization and Optimization",DeepLearning.AI,https://www.coursera.org/learn/deep-neural-network,deep-neural-network
|
| 36 |
+
Diversity and inclusion in the workplace,ESSEC Business School,https://www.coursera.org/learn/diversity-inclusion-workplace,diversity-inclusion-workplace
|
| 37 |
+
Design and Interpretation of Clinical Trials,Johns Hopkins University,https://www.coursera.org/learn/clinical-trials,clinical-trials
|
| 38 |
+
Visual Elements of User Interface Design,California Institute of the Arts,https://www.coursera.org/learn/visual-elements-user-interface-design,visual-elements-user-interface-design
|
| 39 |
+
Management of Fashion and Luxury Companies,Università Bocconi,https://www.coursera.org/learn/mafash,mafash
|
| 40 |
+
Primeros Auxilios Psicológicos (PAP),Universitat Autònoma de Barcelona,https://www.coursera.org/learn/pap,pap
|
| 41 |
+
Social Psychology,Wesleyan University,https://www.coursera.org/learn/social-psychology,social-psychology
|
| 42 |
+
Initiating and Planning Projects,"University of California, Irvine",https://www.coursera.org/learn/project-planning,project-planning
|
| 43 |
+
Computational Thinking for Problem Solving,University of Pennsylvania,https://www.coursera.org/learn/computational-thinking-problem-solving,computational-thinking-problem-solving
|
| 44 |
+
Agile with Atlassian Jira,Atlassian,https://www.coursera.org/learn/agile-atlassian-jira,agile-atlassian-jira
|
| 45 |
+
Fundamentals of Graphic Design,California Institute of the Arts,https://www.coursera.org/learn/fundamentals-of-graphic-design,fundamentals-of-graphic-design
|
| 46 |
+
Introduction to User Experience Design,Georgia Institute of Technology,https://www.coursera.org/learn/user-experience-design,user-experience-design
|
| 47 |
+
Introduction to Marketing,University of Pennsylvania,https://www.coursera.org/learn/wharton-marketing,wharton-marketing
|
| 48 |
+
Python for Data Science and AI,IBM,https://www.coursera.org/learn/python-for-applied-data-science-ai,python-for-applied-data-science-ai
|
| 49 |
+
Marketing Analytics,University of Virginia,https://www.coursera.org/learn/uva-darden-market-analytics,uva-darden-market-analytics
|
| 50 |
+
Natural Language Processing with Classification and Vector Spaces,DeepLearning.AI,https://www.coursera.org/learn/classification-vector-spaces-in-nlp,classification-vector-spaces-in-nlp
|
| 51 |
+
Fundamentals of Quantitative Modeling,University of Pennsylvania,https://www.coursera.org/learn/wharton-quantitative-modeling,wharton-quantitative-modeling
|
| 52 |
+
How to Manage a Remote Team,GitLab,https://www.coursera.org/learn/remote-team-management,remote-team-management
|
| 53 |
+
Mathematics for Machine Learning: Linear Algebra,Imperial College London,https://www.coursera.org/learn/linear-algebra-machine-learning,linear-algebra-machine-learning
|
| 54 |
+
Introduction to Data Science in Python,University of Michigan,https://www.coursera.org/learn/python-data-analysis,python-data-analysis
|
| 55 |
+
Customer Analytics,University of Pennsylvania,https://www.coursera.org/learn/wharton-customer-analytics,wharton-customer-analytics
|
| 56 |
+
Introduction to Psychology,University of Toronto,https://www.coursera.org/learn/introduction-psych,introduction-psych
|
| 57 |
+
English for Career Development,University of Pennsylvania,https://www.coursera.org/learn/careerdevelopment,careerdevelopment
|
| 58 |
+
Global Diplomacy – Diplomacy in the Modern World,University of London,https://www.coursera.org/learn/global-diplomacy,global-diplomacy
|
| 59 |
+
Game Theory,Stanford University,https://www.coursera.org/learn/game-theory-1,game-theory-1
|
| 60 |
+
SQL for Data Science,"University of California, Davis",https://www.coursera.org/learn/sql-for-data-science,sql-for-data-science
|
| 61 |
+
Write Professional Emails in English,Georgia Institute of Technology,https://www.coursera.org/learn/professional-emails-english,professional-emails-english
|
| 62 |
+
Medical Neuroscience,Duke University,https://www.coursera.org/learn/medical-neuroscience,medical-neuroscience
|
| 63 |
+
System Administration and IT Infrastructure Services,Google,https://www.coursera.org/learn/system-administration-it-infrastructure-services,system-administration-it-infrastructure-services
|
| 64 |
+
International Women's Health and Human Rights,Stanford University,https://www.coursera.org/learn/womens-health-human-rights,womens-health-human-rights
|
| 65 |
+
Child Nutrition and Cooking,Stanford University,https://www.coursera.org/learn/childnutrition,childnutrition
|
| 66 |
+
Understanding the Brain: The Neurobiology of Everyday Life,The University of Chicago,https://www.coursera.org/learn/neurobiology,neurobiology
|
| 67 |
+
Introduction to Social Media Marketing,Facebook,https://www.coursera.org/learn/social-media-marketing-introduction,social-media-marketing-introduction
|
| 68 |
+
Forensic Accounting and Fraud Examination,West Virginia University,https://www.coursera.org/learn/forensic-accounting,forensic-accounting
|
| 69 |
+
Clinical Terminology for International and U.S. Students,University of Pittsburgh,https://www.coursera.org/learn/clinical-terminology,clinical-terminology
|
| 70 |
+
Science of Exercise,University of Colorado Boulder,https://www.coursera.org/learn/science-exercise,science-exercise
|
| 71 |
+
Digital Product Management: Modern Fundamentals,University of Virginia,https://www.coursera.org/learn/uva-darden-digital-product-management,uva-darden-digital-product-management
|
| 72 |
+
Data Science Math Skills,Duke University,https://www.coursera.org/learn/datasciencemathskills,datasciencemathskills
|
| 73 |
+
Structuring Machine Learning Projects,DeepLearning.AI,https://www.coursera.org/learn/machine-learning-projects,machine-learning-projects
|
| 74 |
+
An Introduction to American Law,University of Pennsylvania,https://www.coursera.org/learn/american-law,american-law
|
| 75 |
+
The Strategy of Content Marketing,"University of California, Davis",https://www.coursera.org/learn/content-marketing,content-marketing
|
| 76 |
+
Introduction to Cybersecurity Tools & Cyber Attacks,IBM,https://www.coursera.org/learn/introduction-cybersecurity-cyber-attacks,introduction-cybersecurity-cyber-attacks
|
| 77 |
+
The Data Scientist’s Toolbox,Johns Hopkins University,https://www.coursera.org/learn/data-scientists-tools,data-scientists-tools
|
| 78 |
+
Animal Behaviour and Welfare,The University of Edinburgh,https://www.coursera.org/learn/animal-welfare,animal-welfare
|
| 79 |
+
Convolutional Neural Networks in TensorFlow,DeepLearning.AI,https://www.coursera.org/learn/convolutional-neural-networks-tensorflow,convolutional-neural-networks-tensorflow
|
| 80 |
+
Positive Psychology: Martin E. P. Seligman’s Visionary Science,University of Pennsylvania,https://www.coursera.org/learn/positive-psychology-visionary-science,positive-psychology-visionary-science
|
| 81 |
+
Introduction to the Biology of Cancer,Johns Hopkins University,https://www.coursera.org/learn/cancer,cancer
|
| 82 |
+
Convolutional Neural Networks,DeepLearning.AI,https://www.coursera.org/learn/convolutional-neural-networks,convolutional-neural-networks
|
| 83 |
+
Using Python to Access Web Data,University of Michigan,https://www.coursera.org/learn/python-network-data,python-network-data
|
| 84 |
+
Introductory Human Physiology,Duke University,https://www.coursera.org/learn/physiology,physiology
|
| 85 |
+
Introduction to Systematic Review and Meta-Analysis,Johns Hopkins University,https://www.coursera.org/learn/systematic-review,systematic-review
|
| 86 |
+
Organizational Analysis,Stanford University,https://www.coursera.org/learn/organizational-analysis,organizational-analysis
|
| 87 |
+
Communication Strategies for a Virtual Age,University of Toronto,https://www.coursera.org/learn/communication-strategies-virtual-age,communication-strategies-virtual-age
|
| 88 |
+
Moral Foundations of Politics,Yale University,https://www.coursera.org/learn/moral-politics,moral-politics
|
| 89 |
+
Étudier en France: French Intermediate course B1-B2,École Polytechnique,https://www.coursera.org/learn/etudier-en-france,etudier-en-france
|
| 90 |
+
Managing the Company of the Future,London Business School,https://www.coursera.org/learn/company-future-management,company-future-management
|
| 91 |
+
Finance for Non-Finance Professionals,Rice University,https://www.coursera.org/learn/finance-for-non-finance,finance-for-non-finance
|
| 92 |
+
Site Reliability Engineering: Measuring and Managing Reliability,Google Cloud,https://www.coursera.org/learn/site-reliability-engineering-slos,site-reliability-engineering-slos
|
| 93 |
+
Autism Spectrum Disorder,"University of California, Davis",https://www.coursera.org/learn/autism-spectrum-disorder,autism-spectrum-disorder
|
| 94 |
+
Data Science Methodology,IBM,https://www.coursera.org/learn/data-science-methodology,data-science-methodology
|
| 95 |
+
Introduction to Financial Accounting,University of Pennsylvania,https://www.coursera.org/learn/wharton-accounting,wharton-accounting
|
| 96 |
+
Marketing in a Digital World,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/marketing-digital,marketing-digital
|
| 97 |
+
Wind Energy,Technical University of Denmark (DTU),https://www.coursera.org/learn/wind-energy,wind-energy
|
| 98 |
+
Principles of Sustainable Finance,Erasmus University Rotterdam,https://www.coursera.org/learn/sustainable-finance,sustainable-finance
|
| 99 |
+
Financial Engineering and Risk Management Part I,Columbia University,https://www.coursera.org/learn/financial-engineering-1,financial-engineering-1
|
| 100 |
+
Introduction to Philosophy,The University of Edinburgh,https://www.coursera.org/learn/philosophy,philosophy
|
| 101 |
+
Business Metrics for Data-Driven Companies,Duke University,https://www.coursera.org/learn/analytics-business-metrics,analytics-business-metrics
|
| 102 |
+
Python Basics,University of Michigan,https://www.coursera.org/learn/python-basics,python-basics
|
| 103 |
+
Introduction to Sustainability,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/sustainability,sustainability
|
| 104 |
+
Positive Psychiatry and Mental Health,The University of Sydney,https://www.coursera.org/learn/positive-psychiatry,positive-psychiatry
|
| 105 |
+
Cryptography I,Stanford University,https://www.coursera.org/learn/crypto,crypto
|
| 106 |
+
Learning to Teach Online,UNSW Sydney (The University of New South Wales),https://www.coursera.org/learn/teach-online,teach-online
|
| 107 |
+
IT Security: Defense against the digital dark arts,Google,https://www.coursera.org/learn/it-security,it-security
|
| 108 |
+
Entreprise et changement climatique,ESSEC Business School,https://www.coursera.org/learn/entreprise-changement-climatique,entreprise-changement-climatique
|
| 109 |
+
An Introduction to Consumer Neuroscience & Neuromarketing,Copenhagen Business School,https://www.coursera.org/learn/neuromarketing,neuromarketing
|
| 110 |
+
Gamification,University of Pennsylvania,https://www.coursera.org/learn/gamification,gamification
|
| 111 |
+
"Divide and Conquer, Sorting and Searching, and Randomized Algorithms",Stanford University,https://www.coursera.org/learn/algorithms-divide-conquer,algorithms-divide-conquer
|
| 112 |
+
Contabilidad para no contadores,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/contabilidad,contabilidad
|
| 113 |
+
Using Python to Interact with the Operating System,Google,https://www.coursera.org/learn/python-operating-system,python-operating-system
|
| 114 |
+
Object-Oriented Data Structures in C++,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/cs-fundamentals-1,cs-fundamentals-1
|
| 115 |
+
Google Cloud Platform Big Data and Machine Learning Fundamentals,Google Cloud,https://www.coursera.org/learn/gcp-big-data-ml-fundamentals,gcp-big-data-ml-fundamentals
|
| 116 |
+
Databases and SQL for Data Science,IBM,https://www.coursera.org/learn/sql-data-science,sql-data-science
|
| 117 |
+
Natural Language Processing in TensorFlow,DeepLearning.AI,https://www.coursera.org/learn/natural-language-processing-tensorflow,natural-language-processing-tensorflow
|
| 118 |
+
"Advanced Valuation and Strategy - M&A, Private Equity, and Venture Capital",Erasmus University Rotterdam,https://www.coursera.org/learn/advanced-valuation-and-strategy,advanced-valuation-and-strategy
|
| 119 |
+
Natural Language Processing with Probabilistic Models,DeepLearning.AI,https://www.coursera.org/learn/probabilistic-models-in-nlp,probabilistic-models-in-nlp
|
| 120 |
+
Vital Signs: Understanding What the Body Is Telling Us,University of Pennsylvania,https://www.coursera.org/learn/vital-signs,vital-signs
|
| 121 |
+
Understanding Research Methods,University of London,https://www.coursera.org/learn/research-methods,research-methods
|
| 122 |
+
IBM Customer Engagement Specialist Professional Certificate,IBM,https://www.coursera.org/learn/ibm-customer-engagement-specialist,ibm-customer-engagement-specialist
|
| 123 |
+
Introduction to Calculus,The University of Sydney,https://www.coursera.org/learn/introduction-to-calculus,introduction-to-calculus
|
| 124 |
+
Camino a la Excelencia en Gestión de Proyectos,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/camino-excelencia-gestion-proyectos,camino-excelencia-gestion-proyectos
|
| 125 |
+
Introduction to HTML5,University of Michigan,https://www.coursera.org/learn/html,html
|
| 126 |
+
Wine Tasting: Sensory Techniques for Wine Analysis,"University of California, Davis",https://www.coursera.org/learn/wine,wine
|
| 127 |
+
Excel Skills for Business: Intermediate I,Macquarie University,https://www.coursera.org/learn/excel-intermediate-1,excel-intermediate-1
|
| 128 |
+
"Programming Foundations with JavaScript, HTML and CSS",Duke University,https://www.coursera.org/learn/duke-programming-web,duke-programming-web
|
| 129 |
+
Build a Modern Computer from First Principles: From Nand to Tetris (Project-Centered Course),Hebrew University of Jerusalem,https://www.coursera.org/learn/build-a-computer,build-a-computer
|
| 130 |
+
Food & Beverage Management,Università Bocconi,https://www.coursera.org/learn/food-beverage-management,food-beverage-management
|
| 131 |
+
Data Analysis with Python,IBM,https://www.coursera.org/learn/data-analysis-with-python,data-analysis-with-python
|
| 132 |
+
Project Planning,"University of California, Irvine",https://www.coursera.org/learn/project-planning-1,project-planning-1
|
| 133 |
+
Agile Meets Design Thinking,University of Virginia,https://www.coursera.org/learn/uva-darden-getting-started-agile,uva-darden-getting-started-agile
|
| 134 |
+
AWS Fundamentals: Going Cloud-Native,Amazon Web Services,https://www.coursera.org/learn/aws-fundamentals-going-cloud-native,aws-fundamentals-going-cloud-native
|
| 135 |
+
Construction Project Management,Columbia University,https://www.coursera.org/learn/construction-project-management,construction-project-management
|
| 136 |
+
Introduction to Mathematical Thinking,Stanford University,https://www.coursera.org/learn/mathematical-thinking,mathematical-thinking
|
| 137 |
+
Everyday Parenting: The ABCs of Child Rearing,Yale University,https://www.coursera.org/learn/everyday-parenting,everyday-parenting
|
| 138 |
+
Introduction to Healthcare,Stanford University,https://www.coursera.org/learn/intro-to-healthcare,intro-to-healthcare
|
| 139 |
+
Machine Learning with Python,IBM,https://www.coursera.org/learn/machine-learning-with-python,machine-learning-with-python
|
| 140 |
+
Terrorism and Counterterrorism: Comparing Theory and Practice,Universiteit Leiden,https://www.coursera.org/learn/terrorism,terrorism
|
| 141 |
+
Data Management for Clinical Research,Vanderbilt University,https://www.coursera.org/learn/clinical-data-management,clinical-data-management
|
| 142 |
+
Sustainable Fashion,Copenhagen Business School,https://www.coursera.org/learn/sustainable-fashion,sustainable-fashion
|
| 143 |
+
Foundations of Data Science: K-Means Clustering in Python,University of London,https://www.coursera.org/learn/data-science-k-means-clustering-python,data-science-k-means-clustering-python
|
| 144 |
+
Instructional Design Foundations and Applications,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/instructional-design-foundations-applications,instructional-design-foundations-applications
|
| 145 |
+
Cursos en línea: modelo para armar,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/enlinea,enlinea
|
| 146 |
+
Modern Art & Ideas,The Museum of Modern Art,https://www.coursera.org/learn/modern-art-ideas,modern-art-ideas
|
| 147 |
+
"Speak English Professionally: In Person, Online & On the Phone",Georgia Institute of Technology,https://www.coursera.org/learn/speak-english-professionally,speak-english-professionally
|
| 148 |
+
Essential Google Cloud Infrastructure: Foundation,Google Cloud,https://www.coursera.org/learn/gcp-infrastructure-foundation,gcp-infrastructure-foundation
|
| 149 |
+
Introduction to Artificial Intelligence (AI),IBM,https://www.coursera.org/learn/introduction-to-ai,introduction-to-ai
|
| 150 |
+
Dog Emotion and Cognition,Duke University,https://www.coursera.org/learn/dog-emotion-and-cognition,dog-emotion-and-cognition
|
| 151 |
+
International Leadership and Organizational Behavior,Università Bocconi,https://www.coursera.org/learn/organizational-behavior,organizational-behavior
|
| 152 |
+
Driving business towards the Sustainable Development Goals,Erasmus University Rotterdam,https://www.coursera.org/learn/sdgbusiness,sdgbusiness
|
| 153 |
+
"The Sustainable Development Goals – A global, transdisciplinary vision for the future",University of Copenhagen,https://www.coursera.org/learn/global-sustainable-development,global-sustainable-development
|
| 154 |
+
Digital Transformation,BCG,https://www.coursera.org/learn/bcg-uva-darden-digital-transformation,bcg-uva-darden-digital-transformation
|
| 155 |
+
Sequence Models,DeepLearning.AI,https://www.coursera.org/learn/nlp-sequence-models,nlp-sequence-models
|
| 156 |
+
Devenir entrepreneur du changement,HEC Paris,https://www.coursera.org/learn/entrepreneur-changement,entrepreneur-changement
|
| 157 |
+
Seeing Through Photographs,The Museum of Modern Art,https://www.coursera.org/learn/photography,photography
|
| 158 |
+
Entrepreneurship 1: Developing the Opportunity,University of Pennsylvania,https://www.coursera.org/learn/wharton-entrepreneurship-opportunity,wharton-entrepreneurship-opportunity
|
| 159 |
+
Introduction to Search Engine Optimization,"University of California, Davis",https://www.coursera.org/learn/search-engine-optimization,search-engine-optimization
|
| 160 |
+
Learn to Speak Korean 1,Yonsei University,https://www.coursera.org/learn/learn-speak-korean1,learn-speak-korean1
|
| 161 |
+
Circular Economy - Sustainable Materials Management,Delft University of Technology,https://www.coursera.org/learn/circular-economy,circular-economy
|
| 162 |
+
Drug Development,University of California San Diego,https://www.coursera.org/learn/drug-development,drug-development
|
| 163 |
+
R Programming,Johns Hopkins University,https://www.coursera.org/learn/r-programming,r-programming
|
| 164 |
+
Economics of Money and Banking,Columbia University,https://www.coursera.org/learn/money-banking,money-banking
|
| 165 |
+
Chinese for Beginners,Peking University,https://www.coursera.org/learn/learn-chinese,learn-chinese
|
| 166 |
+
Grammar and Punctuation,"University of California, Irvine",https://www.coursera.org/learn/grammar-punctuation,grammar-punctuation
|
| 167 |
+
Japanese for beginners 1,Saint Petersburg State University,https://www.coursera.org/learn/japanese-1,japanese-1
|
| 168 |
+
Introduction to English Common Law,University of London,https://www.coursera.org/learn/intro-common-law,intro-common-law
|
| 169 |
+
Introduction to Dental Medicine,University of Pennsylvania,https://www.coursera.org/learn/dental-medicine-penn,dental-medicine-penn
|
| 170 |
+
Fundamentals of Reinforcement Learning,Alberta Machine Intelligence Institute,https://www.coursera.org/learn/fundamentals-of-reinforcement-learning,fundamentals-of-reinforcement-learning
|
| 171 |
+
The Power of Macroeconomics: Economic Principles in the Real World,"University of California, Irvine",https://www.coursera.org/learn/principles-of-macroeconomics,principles-of-macroeconomics
|
| 172 |
+
Corporate Sustainability. Understanding and Seizing the Strategic Opportunity,Università Bocconi,https://www.coursera.org/learn/corp-sustainability,corp-sustainability
|
| 173 |
+
Behavioral Finance,Duke University,https://www.coursera.org/learn/duke-behavioral-finance,duke-behavioral-finance
|
| 174 |
+
"Sequences, Time Series and Prediction",DeepLearning.AI,https://www.coursera.org/learn/tensorflow-sequences-time-series-and-prediction,tensorflow-sequences-time-series-and-prediction
|
| 175 |
+
Supply Chain Logistics,Rutgers the State University of New Jersey,https://www.coursera.org/learn/supply-chain-logistics,supply-chain-logistics
|
| 176 |
+
Project Execution,"University of California, Irvine",https://www.coursera.org/learn/project-execution,project-execution
|
| 177 |
+
Nutrición y obesidad: control de sobrepeso,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/nutricion-obesidad-sobrepeso,nutricion-obesidad-sobrepeso
|
| 178 |
+
Microeconomics Principles,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/microeconomics,microeconomics
|
| 179 |
+
Creative Writing: The Craft of Plot,Wesleyan University,https://www.coursera.org/learn/craft-of-plot,craft-of-plot
|
| 180 |
+
Astronomy: Exploring Time and Space,University of Arizona,https://www.coursera.org/learn/astro,astro
|
| 181 |
+
Oil & Gas Industry Operations and Markets,Duke University,https://www.coursera.org/learn/oilandgas,oilandgas
|
| 182 |
+
Design Thinking for Innovation,University of Virginia,https://www.coursera.org/learn/uva-darden-design-thinking-innovation,uva-darden-design-thinking-innovation
|
| 183 |
+
EDIVET: Do you have what it takes to be a veterinarian?,The University of Edinburgh,https://www.coursera.org/learn/becoming-a-veterinarian,becoming-a-veterinarian
|
| 184 |
+
Learn to Program: The Fundamentals,University of Toronto,https://www.coursera.org/learn/learn-to-program,learn-to-program
|
| 185 |
+
Financial Accounting Fundamentals,University of Virginia,https://www.coursera.org/learn/uva-darden-financial-accounting,uva-darden-financial-accounting
|
| 186 |
+
Finding Purpose and Meaning In Life: Living for What Matters Most,University of Michigan,https://www.coursera.org/learn/finding-purpose-and-meaning-in-life,finding-purpose-and-meaning-in-life
|
| 187 |
+
Understanding Clinical Research: Behind the Statistics,University of Cape Town,https://www.coursera.org/learn/clinical-research,clinical-research
|
| 188 |
+
Epidemiology: The Basic Science of Public Health,The University of North Carolina at Chapel Hill,https://www.coursera.org/learn/epidemiology,epidemiology
|
| 189 |
+
Fashion as Design,The Museum of Modern Art,https://www.coursera.org/learn/fashion-design,fashion-design
|
| 190 |
+
Teamwork Skills: Communicating Effectively in Groups,University of Colorado Boulder,https://www.coursera.org/learn/teamwork-skills-effective-communication,teamwork-skills-effective-communication
|
| 191 |
+
Feminism and Social Justice,"University of California, Santa Cruz",https://www.coursera.org/learn/feminism-social-justice,feminism-social-justice
|
| 192 |
+
International Organizations Management,University of Geneva,https://www.coursera.org/learn/international-organizations-management,international-organizations-management
|
| 193 |
+
Marketing Digital,Universidade de São Paulo,https://www.coursera.org/learn/estrategia-marketing-digital,estrategia-marketing-digital
|
| 194 |
+
Fundamentals of GIS,"University of California, Davis",https://www.coursera.org/learn/gis,gis
|
| 195 |
+
e-Learning Ecologies: Innovative Approaches to Teaching and Learning for the Digital Age,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/elearning,elearning
|
| 196 |
+
"Excel/VBA for Creative Problem Solving, Part 1",University of Colorado Boulder,https://www.coursera.org/learn/excel-vba-for-creative-problem-solving-part-1,excel-vba-for-creative-problem-solving-part-1
|
| 197 |
+
Rethinking International Tax Law,Universiteit Leiden,https://www.coursera.org/learn/international-taxation,international-taxation
|
| 198 |
+
Introduction to Probability and Data with R,Duke University,https://www.coursera.org/learn/probability-intro,probability-intro
|
| 199 |
+
Understanding and Visualizing Data with Python,University of Michigan,https://www.coursera.org/learn/understanding-visualization-data,understanding-visualization-data
|
| 200 |
+
Fundamentals of Visualization with Tableau,"University of California, Davis",https://www.coursera.org/learn/data-visualization-tableau,data-visualization-tableau
|
| 201 |
+
Getting Started with SAS Programming,SAS,https://www.coursera.org/learn/sas-programming-basics,sas-programming-basics
|
| 202 |
+
Machine Learning for All,University of London,https://www.coursera.org/learn/uol-machine-learning-for-all,uol-machine-learning-for-all
|
| 203 |
+
Using Databases with Python,University of Michigan,https://www.coursera.org/learn/python-databases,python-databases
|
| 204 |
+
Addiction Treatment: Clinical Skills for Healthcare Providers,Yale University,https://www.coursera.org/learn/addiction-treatment,addiction-treatment
|
| 205 |
+
Dino 101: Dinosaur Paleobiology,University of Alberta,https://www.coursera.org/learn/dino101,dino101
|
| 206 |
+
Sports Marketing,Northwestern University,https://www.coursera.org/learn/sports-marketing,sports-marketing
|
| 207 |
+
Positive Psychology,The University of North Carolina at Chapel Hill,https://www.coursera.org/learn/positive-psychology,positive-psychology
|
| 208 |
+
Introduction to Programming with MATLAB,Vanderbilt University,https://www.coursera.org/learn/matlab,matlab
|
| 209 |
+
Preparing to Manage Human Resources,University of Minnesota,https://www.coursera.org/learn/managing-human-resources,managing-human-resources
|
| 210 |
+
Solar Energy Basics,The State University of New York,https://www.coursera.org/learn/solar-energy-basics,solar-energy-basics
|
| 211 |
+
Front-End Web UI Frameworks and Tools: Bootstrap 4,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/bootstrap-4,bootstrap-4
|
| 212 |
+
Building Scalable Java Microservices with Spring Boot and Spring Cloud,Google Cloud,https://www.coursera.org/learn/google-cloud-java-spring,google-cloud-java-spring
|
| 213 |
+
Introduction to Forensic Science,"Nanyang Technological University, Singapore",https://www.coursera.org/learn/forensic-science,forensic-science
|
| 214 |
+
Google Cloud Product Fundamentals,Google Cloud,https://www.coursera.org/learn/google-cloud-product-fundamentals,google-cloud-product-fundamentals
|
| 215 |
+
American Contract Law I,Yale University,https://www.coursera.org/learn/contracts-1,contracts-1
|
| 216 |
+
Engineering Health: Introduction to Yoga and Physiology,New York University,https://www.coursera.org/learn/engineering-health-yoga-physiology,engineering-health-yoga-physiology
|
| 217 |
+
AI for Medical Diagnosis,DeepLearning.AI,https://www.coursera.org/learn/ai-for-medical-diagnosis,ai-for-medical-diagnosis
|
| 218 |
+
Natural Language Processing with Sequence Models,DeepLearning.AI,https://www.coursera.org/learn/sequence-models-in-nlp,sequence-models-in-nlp
|
| 219 |
+
Introduction to Electronics,Georgia Institute of Technology,https://www.coursera.org/learn/electronics,electronics
|
| 220 |
+
International Humanitarian Law in Theory and Practice,Universiteit Leiden,https://www.coursera.org/learn/international-humanitarian-law,international-humanitarian-law
|
| 221 |
+
Making Architecture,IE School of Architecture & Design,https://www.coursera.org/learn/making-architecture,making-architecture
|
| 222 |
+
Model Thinking,University of Michigan,https://www.coursera.org/learn/model-thinking,model-thinking
|
| 223 |
+
Supporting children with difficulties in reading and writing,University of London,https://www.coursera.org/learn/dyslexia-difficulties,dyslexia-difficulties
|
| 224 |
+
Innovation Management,Erasmus University Rotterdam,https://www.coursera.org/learn/innovation-management,innovation-management
|
| 225 |
+
The Manager's Toolkit: A Practical Guide to Managing People at Work,"Birkbeck, University of London",https://www.coursera.org/learn/people-management,people-management
|
| 226 |
+
"The Modern World, Part One: Global History from 1760 to 1910",University of Virginia,https://www.coursera.org/learn/modern-world,modern-world
|
| 227 |
+
Fundamentals of Music Theory,The University of Edinburgh,https://www.coursera.org/learn/edinburgh-music-theory,edinburgh-music-theory
|
| 228 |
+
Supply Chain Principles,Georgia Institute of Technology,https://www.coursera.org/learn/supply-chain-principles,supply-chain-principles
|
| 229 |
+
Essential Google Cloud Infrastructure: Core Services,Google Cloud,https://www.coursera.org/learn/gcp-infrastructure-core-services,gcp-infrastructure-core-services
|
| 230 |
+
Weight Management: Beyond Balancing Calories,Emory University,https://www.coursera.org/learn/weight-management-beyond-balancing-calories,weight-management-beyond-balancing-calories
|
| 231 |
+
Miracles of Human Language: An Introduction to Linguistics,Universiteit Leiden,https://www.coursera.org/learn/human-language,human-language
|
| 232 |
+
Java Programming: Solving Problems with Software,Duke University,https://www.coursera.org/learn/java-programming,java-programming
|
| 233 |
+
Race and Cultural Diversity in American Life and History,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/race-cultural-diversity-american-life,race-cultural-diversity-american-life
|
| 234 |
+
Inspiring and Motivating Individuals,University of Michigan,https://www.coursera.org/learn/motivate-people-teams,motivate-people-teams
|
| 235 |
+
"Competencias digitales. Herramientas de ofimática (Microsoft Word, Excel, Power Point)",Universitat Autònoma de Barcelona,https://www.coursera.org/learn/competencias-digitales-ofimatica,competencias-digitales-ofimatica
|
| 236 |
+
Healing with the Arts,University of Florida,https://www.coursera.org/learn/healing-with-the-arts,healing-with-the-arts
|
| 237 |
+
People Analytics,University of Pennsylvania,https://www.coursera.org/learn/wharton-people-analytics,wharton-people-analytics
|
| 238 |
+
What is Social?,Northwestern University,https://www.coursera.org/learn/what-is-social,what-is-social
|
| 239 |
+
UX Design Fundamentals,California Institute of the Arts,https://www.coursera.org/learn/ux-design-fundamentals,ux-design-fundamentals
|
| 240 |
+
Creative Thinking: Techniques and Tools for Success,Imperial College London,https://www.coursera.org/learn/creative-thinking-techniques-and-tools-for-success,creative-thinking-techniques-and-tools-for-success
|
| 241 |
+
Introduction to Classical Music,Yale University,https://www.coursera.org/learn/introclassicalmusic,introclassicalmusic
|
| 242 |
+
Children's Human Rights - An Interdisciplinary Introduction,University of Geneva,https://www.coursera.org/learn/childrens-rights,childrens-rights
|
| 243 |
+
Investment Management in an Evolving and Volatile World by HEC Paris and AXA Investment Managers,HEC Paris,https://www.coursera.org/learn/investment-management,investment-management
|
| 244 |
+
Introduction to Data Analysis Using Excel,Rice University,https://www.coursera.org/learn/excel-data-analysis,excel-data-analysis
|
| 245 |
+
Mind Control: Managing Your Mental Health During COVID-19,University of Toronto,https://www.coursera.org/learn/manage-health-covid-19,manage-health-covid-19
|
| 246 |
+
Introduction to International Criminal Law,Case Western Reserve University,https://www.coursera.org/learn/international-criminal-law,international-criminal-law
|
| 247 |
+
"FinTech: Foundations, Payments, and Regulations",University of Pennsylvania,https://www.coursera.org/learn/wharton-fintech-overview-payments-regulations,wharton-fintech-overview-payments-regulations
|
| 248 |
+
Greek and Roman Mythology,University of Pennsylvania,https://www.coursera.org/learn/mythology,mythology
|
| 249 |
+
Politics and Economics of International Energy,Sciences Po,https://www.coursera.org/learn/global-energy,global-energy
|
| 250 |
+
Continuous Delivery & DevOps,University of Virginia,https://www.coursera.org/learn/uva-darden-continous-delivery-devops,uva-darden-continous-delivery-devops
|
| 251 |
+
Teach English Now! Foundational Principles,Arizona State University,https://www.coursera.org/learn/english-principles,english-principles
|
| 252 |
+
Business Model Innovation,HEC Paris,https://www.coursera.org/learn/business-model,business-model
|
| 253 |
+
Introduction to User Experience Principles and Processes,University of Michigan,https://www.coursera.org/learn/introtoux-principles-and-processes,introtoux-principles-and-processes
|
| 254 |
+
Beyond the Sustainable Development Goals (SDGs): Addressing Sustainability and Development,University of Michigan,https://www.coursera.org/learn/beyond-the-sustainable-development-goals-addressing-sustainability-and-development,beyond-the-sustainable-development-goals-addressing-sustainability-and-development
|
| 255 |
+
Process Mining: Data science in Action,Eindhoven University of Technology,https://www.coursera.org/learn/process-mining,process-mining
|
| 256 |
+
Fundamentals of Immunology: Innate Immunity and B-Cell Function,Rice University,https://www.coursera.org/learn/immunologyfundamentalsimmunitybcells,immunologyfundamentalsimmunitybcells
|
| 257 |
+
Introduction to Corporate Finance,University of Pennsylvania,https://www.coursera.org/learn/wharton-finance,wharton-finance
|
| 258 |
+
Global Diplomacy: the United Nations in the World,University of London,https://www.coursera.org/learn/global-diplomacy-un,global-diplomacy-un
|
| 259 |
+
Algorithmic Toolbox,University of California San Diego,https://www.coursera.org/learn/algorithmic-toolbox,algorithmic-toolbox
|
| 260 |
+
Troubles du spectre de l'autisme : diagnostic,University of Geneva,https://www.coursera.org/learn/troubles-spectre-autisme-diagnostic,troubles-spectre-autisme-diagnostic
|
| 261 |
+
Anatomy: Musculoskeletal and Integumentary Systems,University of Michigan,https://www.coursera.org/learn/anatomy403-1x,anatomy403-1x
|
| 262 |
+
Unraveling the Cycling City,University of Amsterdam,https://www.coursera.org/learn/unraveling-the-cycling-city,unraveling-the-cycling-city
|
| 263 |
+
A Crash Course in Causality: Inferring Causal Effects from Observational Data,University of Pennsylvania,https://www.coursera.org/learn/crash-course-in-causality,crash-course-in-causality
|
| 264 |
+
English for Business and Entrepreneurship,University of Pennsylvania,https://www.coursera.org/learn/business,business
|
| 265 |
+
Natural Language Processing with Attention Models,DeepLearning.AI,https://www.coursera.org/learn/attention-models-in-nlp,attention-models-in-nlp
|
| 266 |
+
What is Compliance?,University of Pennsylvania,https://www.coursera.org/learn/what-is-compliance,what-is-compliance
|
| 267 |
+
Getting Started with Google Sheets,Google Cloud,https://www.coursera.org/learn/getting-started-with-google-sheets,getting-started-with-google-sheets
|
| 268 |
+
Data Visualization with Python,IBM,https://www.coursera.org/learn/python-for-data-visualization,python-for-data-visualization
|
| 269 |
+
Foundations of Mindfulness,Rice University,https://www.coursera.org/learn/foundations-of-mindfulness,foundations-of-mindfulness
|
| 270 |
+
Negociación exitosa: Estrategias y habilidades esenciales (en español),University of Michigan,https://www.coursera.org/learn/negociacion,negociacion
|
| 271 |
+
Data-driven Decision Making,PwC,https://www.coursera.org/learn/decision-making,decision-making
|
| 272 |
+
Fundamentals of Engineering Exam Review,Georgia Institute of Technology,https://www.coursera.org/learn/fe-exam,fe-exam
|
| 273 |
+
Gender and Sexuality: Diversity and Inclusion in the Workplace,University of Pittsburgh,https://www.coursera.org/learn/gender-sexuality,gender-sexuality
|
| 274 |
+
Managerial Accounting Fundamentals,University of Virginia,https://www.coursera.org/learn/uva-darden-managerial-accounting,uva-darden-managerial-accounting
|
| 275 |
+
Search Engine Optimization Fundamentals,"University of California, Davis",https://www.coursera.org/learn/seo-fundamentals,seo-fundamentals
|
| 276 |
+
Essentials of Global Health,Yale University,https://www.coursera.org/learn/essentials-global-health,essentials-global-health
|
| 277 |
+
International Security Management,Erasmus University Rotterdam,https://www.coursera.org/learn/international-security-management,international-security-management
|
| 278 |
+
Getting Started with AWS Machine Learning,Amazon Web Services,https://www.coursera.org/learn/aws-machine-learning,aws-machine-learning
|
| 279 |
+
Arts and Heritage Management,Università Bocconi,https://www.coursera.org/learn/arts-heritage,arts-heritage
|
| 280 |
+
Understanding Einstein: The Special Theory of Relativity,Stanford University,https://www.coursera.org/learn/einstein-relativity,einstein-relativity
|
| 281 |
+
Réussir le Changement,ESSEC Business School,https://www.coursera.org/learn/reussir-le-changement,reussir-le-changement
|
| 282 |
+
Equine Welfare and Management,"University of California, Davis",https://www.coursera.org/learn/equine,equine
|
| 283 |
+
International migrations: a global issue,Sciences Po,https://www.coursera.org/learn/international-migrations,international-migrations
|
| 284 |
+
Introduction to Web Development,"University of California, Davis",https://www.coursera.org/learn/web-development,web-development
|
| 285 |
+
Writing and Editing: Word Choice and Word Order,University of Michigan,https://www.coursera.org/learn/writing-editing-words,writing-editing-words
|
| 286 |
+
Introduction to the Digital Advertising Landscape,University of Colorado Boulder,https://www.coursera.org/learn/digital-advertising-landscape,digital-advertising-landscape
|
| 287 |
+
Access Controls,(ISC)²,https://www.coursera.org/learn/access-control-sscp,access-control-sscp
|
| 288 |
+
Engineering Project Management: Initiating and Planning,Rice University,https://www.coursera.org/learn/initiating-planning,initiating-planning
|
| 289 |
+
Kotlin for Java Developers,JetBrains,https://www.coursera.org/learn/kotlin-for-java-developers,kotlin-for-java-developers
|
| 290 |
+
Mathematics for Machine Learning: Multivariate Calculus,Imperial College London,https://www.coursera.org/learn/multivariate-calculus-machine-learning,multivariate-calculus-machine-learning
|
| 291 |
+
Introduction to Git and GitHub,Google,https://www.coursera.org/learn/introduction-git-github,introduction-git-github
|
| 292 |
+
Industrial Biotechnology,University of Manchester ,https://www.coursera.org/learn/industrial-biotech,industrial-biotech
|
| 293 |
+
The Addicted Brain,Emory University,https://www.coursera.org/learn/addiction-and-the-brain,addiction-and-the-brain
|
| 294 |
+
Introducción a la programación en Python I: Aprendiendo a programar con Python,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/aprendiendo-programar-python,aprendiendo-programar-python
|
| 295 |
+
Modernizing Data Lakes and Data Warehouses with GCP,Google Cloud,https://www.coursera.org/learn/data-lakes-data-warehouses-gcp,data-lakes-data-warehouses-gcp
|
| 296 |
+
Drug Discovery,University of California San Diego,https://www.coursera.org/learn/drug-discovery,drug-discovery
|
| 297 |
+
Nutrition and Lifestyle in Pregnancy,Ludwig-Maximilians-Universität München (LMU),https://www.coursera.org/learn/nutrition-pregnancy,nutrition-pregnancy
|
| 298 |
+
Financial Acumen for Non-Financial Managers,University of Pennsylvania,https://www.coursera.org/learn/finance-healthcare-managers,finance-healthcare-managers
|
| 299 |
+
Python and Statistics for Financial Analysis,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/python-statistics-financial-analysis,python-statistics-financial-analysis
|
| 300 |
+
Bugs 101: Insect-Human Interactions,University of Alberta,https://www.coursera.org/learn/bugs-101,bugs-101
|
| 301 |
+
Autodesk Certified Professional: Revit for Architectural Design Exam Prep,Autodesk,https://www.coursera.org/learn/autodesk-revit-architectural-design,autodesk-revit-architectural-design
|
| 302 |
+
"Leading for Equity, Diversity and Inclusion in Higher Education",University of Michigan,https://www.coursera.org/learn/leading-for-equity-diversity-inclusion,leading-for-equity-diversity-inclusion
|
| 303 |
+
Digital Media and Marketing Strategies,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/marketing-plan,marketing-plan
|
| 304 |
+
Enterprise Architecture,Peter the Great St. Petersburg Polytechnic University,https://www.coursera.org/learn/enterprise-architecture,enterprise-architecture
|
| 305 |
+
Introduction to Spreadsheets and Models,University of Pennsylvania,https://www.coursera.org/learn/wharton-introduction-spreadsheets-models,wharton-introduction-spreadsheets-models
|
| 306 |
+
The Arts and Science of Relationships: Understanding Human Needs,University of Toronto,https://www.coursera.org/learn/human-needs,human-needs
|
| 307 |
+
Essentials in Clinical Simulations Across the Health Professions,The George Washington University,https://www.coursera.org/learn/clinicalsimulations,clinicalsimulations
|
| 308 |
+
Budgeting and Scheduling Projects,"University of California, Irvine",https://www.coursera.org/learn/schedule-projects,schedule-projects
|
| 309 |
+
Machine Learning for Business Professionals,Google Cloud,https://www.coursera.org/learn/machine-learning-business-professionals,machine-learning-business-professionals
|
| 310 |
+
Introduction to Accounting Data Analytics and Visualization,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/intro-accounting-data-analytics-visual,intro-accounting-data-analytics-visual
|
| 311 |
+
Spanish Vocabulary: Meeting People,"University of California, Davis",https://www.coursera.org/learn/spanish-vocabulary-meeting-people,spanish-vocabulary-meeting-people
|
| 312 |
+
Gestión Empresarial Exitosa para Pymes,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/gestionempresarialpyme,gestionempresarialpyme
|
| 313 |
+
Public Policy Challenges of the 21st Century,University of Virginia,https://www.coursera.org/learn/public-policy,public-policy
|
| 314 |
+
International Law in Action: the Arbitration of International Disputes,Universiteit Leiden,https://www.coursera.org/learn/arbitration-international-disputes,arbitration-international-disputes
|
| 315 |
+
Introduction to Ancient Egypt and Its Civilization,University of Pennsylvania,https://www.coursera.org/learn/introancientegypt,introancientegypt
|
| 316 |
+
Financing and Investing in Infrastructure,Università Bocconi,https://www.coursera.org/learn/infrastructure-investing,infrastructure-investing
|
| 317 |
+
Global Environmental Management,Technical University of Denmark (DTU),https://www.coursera.org/learn/global-environmental-management,global-environmental-management
|
| 318 |
+
Operations Analytics,University of Pennsylvania,https://www.coursera.org/learn/wharton-operations-analytics,wharton-operations-analytics
|
| 319 |
+
Entrepreneurship Strategy: From Ideation to Exit,HEC Paris,https://www.coursera.org/learn/entrepreneurship-strategy,entrepreneurship-strategy
|
| 320 |
+
FinTech Law and Policy,Duke University,https://www.coursera.org/learn/fintechlawandpolicy,fintechlawandpolicy
|
| 321 |
+
The Social Context of Mental Health and Illness,University of Toronto,https://www.coursera.org/learn/mental-health,mental-health
|
| 322 |
+
What Is Contemporary Art?,The Museum of Modern Art,https://www.coursera.org/learn/contemporary-art,contemporary-art
|
| 323 |
+
The Art of Music Production,Berklee College of Music,https://www.coursera.org/learn/producing-music,producing-music
|
| 324 |
+
Biohacking Your Brain's Health,Emory University,https://www.coursera.org/learn/biohacking-your-brains-health,biohacking-your-brains-health
|
| 325 |
+
Bayesian Statistics: From Concept to Data Analysis,"University of California, Santa Cruz",https://www.coursera.org/learn/bayesian-statistics,bayesian-statistics
|
| 326 |
+
Reporting extra-financier et stratégie RSE,ESSEC Business School,https://www.coursera.org/learn/reporting-extra-financier-strategie-rse,reporting-extra-financier-strategie-rse
|
| 327 |
+
Leading Healthcare Quality and Safety,The George Washington University,https://www.coursera.org/learn/quality-healthcare,quality-healthcare
|
| 328 |
+
Understanding International Relations Theory,National Research University Higher School of Economics,https://www.coursera.org/learn/international-relations-theory,international-relations-theory
|
| 329 |
+
Introduction to Data Analytics,IBM,https://www.coursera.org/learn/introduction-to-data-analytics,introduction-to-data-analytics
|
| 330 |
+
Fundamentos de Excel para Negocios,Universidad Austral,https://www.coursera.org/learn/excel-para-negocios,excel-para-negocios
|
| 331 |
+
Elastic Google Cloud Infrastructure: Scaling and Automation,Google Cloud,https://www.coursera.org/learn/gcp-infrastructure-scaling-automation,gcp-infrastructure-scaling-automation
|
| 332 |
+
Cultural Competence - Aboriginal Sydney,The University of Sydney,https://www.coursera.org/learn/cultural-competence-aboriginal-sydney,cultural-competence-aboriginal-sydney
|
| 333 |
+
Fundamentos de Finanzas Empresariales,Universidad de los Andes,https://www.coursera.org/learn/finanzas-empresariales,finanzas-empresariales
|
| 334 |
+
Greening the Economy: Sustainable Cities,Lund University,https://www.coursera.org/learn/gte-sustainable-cities,gte-sustainable-cities
|
| 335 |
+
Introduction to Engineering Mechanics,Georgia Institute of Technology,https://www.coursera.org/learn/engineering-mechanics-statics,engineering-mechanics-statics
|
| 336 |
+
Design-Led Strategy: Design thinking for business strategy and entrepreneurship,The University of Sydney,https://www.coursera.org/learn/design-strategy,design-strategy
|
| 337 |
+
Biology Meets Programming: Bioinformatics for Beginners,University of California San Diego,https://www.coursera.org/learn/bioinformatics,bioinformatics
|
| 338 |
+
Understanding Medical Research: Your Facebook Friend is Wrong,Yale University,https://www.coursera.org/learn/medical-research,medical-research
|
| 339 |
+
Health Behavior Change: From Evidence to Action,Yale University,https://www.coursera.org/learn/health-behavior-change,health-behavior-change
|
| 340 |
+
Ordered Data Structures,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/cs-fundamentals-2,cs-fundamentals-2
|
| 341 |
+
Mindshift: Break Through Obstacles to Learning and Discover Your Hidden Potential,McMaster University,https://www.coursera.org/learn/mindshift,mindshift
|
| 342 |
+
Programming Fundamentals,Duke University,https://www.coursera.org/learn/programming-fundamentals,programming-fundamentals
|
| 343 |
+
Understanding Financial Markets,University of Geneva,https://www.coursera.org/learn/understanding-financial-markets,understanding-financial-markets
|
| 344 |
+
In the Studio: Postwar Abstract Painting,The Museum of Modern Art,https://www.coursera.org/learn/painting,painting
|
| 345 |
+
Drug Commercialization,University of California San Diego,https://www.coursera.org/learn/drug-commercialization,drug-commercialization
|
| 346 |
+
Introduction to Software Product Management,University of Alberta,https://www.coursera.org/learn/introduction-to-software-product-management,introduction-to-software-product-management
|
| 347 |
+
"Social Norms, Social Change I",Unicef,https://www.coursera.org/learn/norms,norms
|
| 348 |
+
Excel Skills for Business: Intermediate II,Macquarie University,https://www.coursera.org/learn/excel-intermediate-2,excel-intermediate-2
|
| 349 |
+
Aboriginal Worldviews and Education,University of Toronto,https://www.coursera.org/learn/aboriginal-education,aboriginal-education
|
| 350 |
+
"Information Systems Auditing, Controls and Assurance",The Hong Kong University of Science and Technology,https://www.coursera.org/learn/information-systems-audit,information-systems-audit
|
| 351 |
+
Six Sigma Principles,University System of Georgia,https://www.coursera.org/learn/six-sigma-principles,six-sigma-principles
|
| 352 |
+
Business Writing,University of Colorado Boulder,https://www.coursera.org/learn/writing-for-business,writing-for-business
|
| 353 |
+
Autodesk Certified Professional: AutoCAD for Design and Drafting Exam Prep,Autodesk,https://www.coursera.org/learn/autodesk-autocad-design-drafting,autodesk-autocad-design-drafting
|
| 354 |
+
Introduction to Typography,California Institute of the Arts,https://www.coursera.org/learn/typography,typography
|
| 355 |
+
Customer Segmentation and Prospecting,Northwestern University,https://www.coursera.org/learn/customer-segmentation-prospecting,customer-segmentation-prospecting
|
| 356 |
+
Claves para Gestionar Personas,IESE Business School,https://www.coursera.org/learn/gestionar-personas,gestionar-personas
|
| 357 |
+
English for Journalism,University of Pennsylvania,https://www.coursera.org/learn/journalism,journalism
|
| 358 |
+
How Things Work: An Introduction to Physics,University of Virginia,https://www.coursera.org/learn/how-things-work,how-things-work
|
| 359 |
+
Business English: Networking,University of Washington,https://www.coursera.org/learn/business-english-intro,business-english-intro
|
| 360 |
+
Summary Statistics in Public Health,Johns Hopkins University,https://www.coursera.org/learn/summary-statistics,summary-statistics
|
| 361 |
+
The Changing Global Order,Universiteit Leiden,https://www.coursera.org/learn/changing-global-order,changing-global-order
|
| 362 |
+
Global Energy and Climate Policy,University of London,https://www.coursera.org/learn/globalenergyandclimatepolicy,globalenergyandclimatepolicy
|
| 363 |
+
El Abogado del Futuro: Legaltech y la Transformación Digital del Derecho,Universidad Austral,https://www.coursera.org/learn/legaltech,legaltech
|
| 364 |
+
Probability and Statistics: To p or not to p?,University of London,https://www.coursera.org/learn/probability-statistics,probability-statistics
|
| 365 |
+
Gut Check: Exploring Your Microbiome,University of Colorado Boulder,https://www.coursera.org/learn/microbiome,microbiome
|
| 366 |
+
Econometrics: Methods and Applications,Erasmus University Rotterdam,https://www.coursera.org/learn/erasmus-econometrics,erasmus-econometrics
|
| 367 |
+
Разработка веб-сервисов на Go - основы языка,Moscow Institute of Physics and Technology,https://www.coursera.org/learn/golang-webservices-1,golang-webservices-1
|
| 368 |
+
Mastering Data Analysis in Excel,Duke University,https://www.coursera.org/learn/analytics-excel,analytics-excel
|
| 369 |
+
Basic Statistics,University of Amsterdam,https://www.coursera.org/learn/basic-statistics,basic-statistics
|
| 370 |
+
"Capstone: Retrieving, Processing, and Visualizing Data with Python",University of Michigan,https://www.coursera.org/learn/python-data-visualization,python-data-visualization
|
| 371 |
+
Design Thinking for the Greater Good: Innovation in the Social Sector,University of Virginia,https://www.coursera.org/learn/uva-darden-design-thinking-social-sector,uva-darden-design-thinking-social-sector
|
| 372 |
+
Introduction to Portfolio Construction and Analysis with Python,EDHEC Business School,https://www.coursera.org/learn/introduction-portfolio-construction-python,introduction-portfolio-construction-python
|
| 373 |
+
Data Analytics for Lean Six Sigma,University of Amsterdam,https://www.coursera.org/learn/data-analytics-for-lean-six-sigma,data-analytics-for-lean-six-sigma
|
| 374 |
+
Refugees in the 21st Century,University of London,https://www.coursera.org/learn/refugees-21st-century,refugees-21st-century
|
| 375 |
+
Building Containerized Applications on AWS,Amazon Web Services,https://www.coursera.org/learn/containerized-apps-on-aws,containerized-apps-on-aws
|
| 376 |
+
Business Transformation with Google Cloud,Google Cloud,https://www.coursera.org/learn/business-transformation-google-cloud,business-transformation-google-cloud
|
| 377 |
+
Version Control with Git,Atlassian,https://www.coursera.org/learn/version-control-with-git,version-control-with-git
|
| 378 |
+
"Transmedia Storytelling: Narrative worlds, emerging technologies, and global audiences",UNSW Sydney (The University of New South Wales),https://www.coursera.org/learn/transmedia-storytelling,transmedia-storytelling
|
| 379 |
+
Excel aplicado a los negocios (Nivel Avanzado),Universidad Austral,https://www.coursera.org/learn/excel-aplicado-negocios-avanzado,excel-aplicado-negocios-avanzado
|
| 380 |
+
Introduction to Public Speaking,University of Washington,https://www.coursera.org/learn/public-speaking,public-speaking
|
| 381 |
+
Building Conversational Experiences with Dialogflow,Google Cloud,https://www.coursera.org/learn/conversational-experiences-dialogflow,conversational-experiences-dialogflow
|
| 382 |
+
Guitar for Beginners,Berklee College of Music,https://www.coursera.org/learn/guitar,guitar
|
| 383 |
+
Managing Project Risks and Changes,"University of California, Irvine",https://www.coursera.org/learn/project-risk-management,project-risk-management
|
| 384 |
+
L'excellence opérationnelle en pratique,ESSEC Business School,https://www.coursera.org/learn/excellence-operationnelle,excellence-operationnelle
|
| 385 |
+
Introduction to Cloud Computing,IBM,https://www.coursera.org/learn/introduction-to-cloud,introduction-to-cloud
|
| 386 |
+
Sample-based Learning Methods,Alberta Machine Intelligence Institute,https://www.coursera.org/learn/sample-based-learning-methods,sample-based-learning-methods
|
| 387 |
+
Functional Programming Principles in Scala,École Polytechnique Fédérale de Lausanne,https://www.coursera.org/learn/progfun1,progfun1
|
| 388 |
+
Introduction to Blockchain Technologies,INSEAD,https://www.coursera.org/learn/introduction-blockchain-technologies,introduction-blockchain-technologies
|
| 389 |
+
Introduction to Environmental Law and Policy,The University of North Carolina at Chapel Hill,https://www.coursera.org/learn/environmental-law,environmental-law
|
| 390 |
+
"Cameras, Exposure, and Photography",Michigan State University,https://www.coursera.org/learn/exposure-photography,exposure-photography
|
| 391 |
+
Democracia y decisiones públicas. Introducción al análisis de políticas públicas,Universitat Autònoma de Barcelona,https://www.coursera.org/learn/democracia,democracia
|
| 392 |
+
Dentistry 101,University of Michigan,https://www.coursera.org/learn/dentistry101,dentistry101
|
| 393 |
+
"Python Functions, Files, and Dictionaries",University of Michigan,https://www.coursera.org/learn/python-functions-files-dictionaries,python-functions-files-dictionaries
|
| 394 |
+
Anticorrupción: Introducción a conceptos y perspectiva práctica,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/anticorrupcion-introduccion,anticorrupcion-introduccion
|
| 395 |
+
Positive Psychology: Applications and Interventions,University of Pennsylvania,https://www.coursera.org/learn/positive-psychology-applications,positive-psychology-applications
|
| 396 |
+
Introduction to Embedded Systems Software and Development Environments,University of Colorado Boulder,https://www.coursera.org/learn/introduction-embedded-systems,introduction-embedded-systems
|
| 397 |
+
Personal & Family Financial Planning,University of Florida,https://www.coursera.org/learn/family-planning,family-planning
|
| 398 |
+
A Law Student's Toolkit,Yale University,https://www.coursera.org/learn/law-student,law-student
|
| 399 |
+
Introducción a Data Science: Programación Estadística con R,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/intro-data-science-programacion-estadistica-r,intro-data-science-programacion-estadistica-r
|
| 400 |
+
"Cybersecurity Roles, Processes & Operating System Security",IBM,https://www.coursera.org/learn/cybersecurity-roles-processes-operating-system-security,cybersecurity-roles-processes-operating-system-security
|
| 401 |
+
Computational Neuroscience,University of Washington,https://www.coursera.org/learn/computational-neuroscience,computational-neuroscience
|
| 402 |
+
De-Mystifying Mindfulness,Universiteit Leiden,https://www.coursera.org/learn/mindfulness,mindfulness
|
| 403 |
+
Smart Cities – Management of Smart Urban Infrastructures,École Polytechnique Fédérale de Lausanne,https://www.coursera.org/learn/smart-cities,smart-cities
|
| 404 |
+
Getting Started with Go,"University of California, Irvine",https://www.coursera.org/learn/golang-getting-started,golang-getting-started
|
| 405 |
+
Introduction to Economic Theories,Erasmus University Rotterdam,https://www.coursera.org/learn/intro-economic-theories,intro-economic-theories
|
| 406 |
+
Probabilistic Graphical Models 1: Representation,Stanford University,https://www.coursera.org/learn/probabilistic-graphical-models,probabilistic-graphical-models
|
| 407 |
+
The Power of Microeconomics: Economic Principles in the Real World,"University of California, Irvine",https://www.coursera.org/learn/principles-of-microeconomics,principles-of-microeconomics
|
| 408 |
+
Introduction to Personal Branding,University of Virginia,https://www.coursera.org/learn/personal-branding,personal-branding
|
| 409 |
+
Love as a Force for Social Justice,Stanford University,https://www.coursera.org/learn/love-social-justice,love-social-justice
|
| 410 |
+
Mathematical Thinking in Computer Science,University of California San Diego,https://www.coursera.org/learn/what-is-a-proof,what-is-a-proof
|
| 411 |
+
Introduction to Genetics and Evolution,Duke University,https://www.coursera.org/learn/genetics-evolution,genetics-evolution
|
| 412 |
+
Основы программирования на Python,National Research University Higher School of Economics,https://www.coursera.org/learn/python-osnovy-programmirovaniya,python-osnovy-programmirovaniya
|
| 413 |
+
Improving Communication Skills,University of Pennsylvania,https://www.coursera.org/learn/wharton-communication-skills,wharton-communication-skills
|
| 414 |
+
"Introduction to Trading, Machine Learning & GCP",New York Institute of Finance,https://www.coursera.org/learn/introduction-trading-machine-learning-gcp,introduction-trading-machine-learning-gcp
|
| 415 |
+
Python Programming: A Concise Introduction,Wesleyan University,https://www.coursera.org/learn/python-programming-introduction,python-programming-introduction
|
| 416 |
+
"The Modern World, Part Two: Global History since 1910",University of Virginia,https://www.coursera.org/learn/modern-world-2,modern-world-2
|
| 417 |
+
Understanding Plants - Part I: What a Plant Knows,Tel Aviv University,https://www.coursera.org/learn/plantknows,plantknows
|
| 418 |
+
Excel Fundamentals for Data Analysis,Macquarie University,https://www.coursera.org/learn/excel-data-analysis-fundamentals,excel-data-analysis-fundamentals
|
| 419 |
+
Finanzas personales,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/finanzas-personales,finanzas-personales
|
| 420 |
+
English Composition I,Duke University,https://www.coursera.org/learn/english-composition,english-composition
|
| 421 |
+
Career 911: Your Future Job in Medicine and Healthcare,Northwestern University,https://www.coursera.org/learn/healthcarejobs,healthcarejobs
|
| 422 |
+
Introduction to Self-Driving Cars,University of Toronto,https://www.coursera.org/learn/intro-self-driving-cars,intro-self-driving-cars
|
| 423 |
+
Corporate & Commercial Law I: Contracts & Employment Law,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/corporate-commercial-law-part1,corporate-commercial-law-part1
|
| 424 |
+
Blockchain Basics,University at Buffalo,https://www.coursera.org/learn/blockchain-basics,blockchain-basics
|
| 425 |
+
Foundations of Business Strategy,University of Virginia,https://www.coursera.org/learn/uva-darden-foundations-business-strategy,uva-darden-foundations-business-strategy
|
| 426 |
+
Introdução à Ciência da Computação com Python Parte 1,Universidade de São Paulo,https://www.coursera.org/learn/ciencia-computacao-python-conceitos,ciencia-computacao-python-conceitos
|
| 427 |
+
Stochastic processes,National Research University Higher School of Economics,https://www.coursera.org/learn/stochasticprocesses,stochasticprocesses
|
| 428 |
+
Foundations for Big Data Analysis with SQL,Cloudera,https://www.coursera.org/learn/foundations-big-data-analysis-sql,foundations-big-data-analysis-sql
|
| 429 |
+
"Innovation Through Design: Think, Make, Break, Repeat",The University of Sydney,https://www.coursera.org/learn/innovation-through-design,innovation-through-design
|
| 430 |
+
Perfect Tenses and Modals,"University of California, Irvine",https://www.coursera.org/learn/perfect-tenses-modals,perfect-tenses-modals
|
| 431 |
+
Getting Started with Azure,LearnQuest,https://www.coursera.org/learn/cloud-azure-intro,cloud-azure-intro
|
| 432 |
+
Moralities of Everyday Life,Yale University,https://www.coursera.org/learn/moralities,moralities
|
| 433 |
+
Revisão Sistemática e Meta-análise,Universidade Estadual de Campinas,https://www.coursera.org/learn/revisao-sistematica,revisao-sistematica
|
| 434 |
+
Understanding child development: from synapse to society,Utrecht University,https://www.coursera.org/learn/child-development,child-development
|
| 435 |
+
Introduction to G Suite,Google Cloud,https://www.coursera.org/learn/introduction-g-suite,introduction-g-suite
|
| 436 |
+
Aprendiendo a aprender: Poderosas herramientas mentales con las que podrás dominar temas difíciles (Learning How to Learn),University of California San Diego,https://www.coursera.org/learn/aprendiendo-a-aprender,aprendiendo-a-aprender
|
| 437 |
+
Building Batch Data Pipelines on GCP,Google Cloud,https://www.coursera.org/learn/batch-data-pipelines-gcp,batch-data-pipelines-gcp
|
| 438 |
+
Financial Planning for Young Adults,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/financial-planning,financial-planning
|
| 439 |
+
Quantitative Methods,University of Amsterdam,https://www.coursera.org/learn/quantitative-methods,quantitative-methods
|
| 440 |
+
"Introduction to Self-Determination Theory: An approach to motivation, development and wellness",University of Rochester,https://www.coursera.org/learn/self-determination-theory,self-determination-theory
|
| 441 |
+
The Technology of Music Production,Berklee College of Music,https://www.coursera.org/learn/technology-of-music-production,technology-of-music-production
|
| 442 |
+
Code Yourself! An Introduction to Programming,The University of Edinburgh,https://www.coursera.org/learn/intro-programming,intro-programming
|
| 443 |
+
Success,University of Pennsylvania,https://www.coursera.org/learn/wharton-success,wharton-success
|
| 444 |
+
Chemicals and Health,Johns Hopkins University,https://www.coursera.org/learn/chemicals-health,chemicals-health
|
| 445 |
+
Improving your statistical inferences,Eindhoven University of Technology,https://www.coursera.org/learn/statistical-inferences,statistical-inferences
|
| 446 |
+
Fundamentals of Finance,University of Pennsylvania,https://www.coursera.org/learn/finance-fundamentals,finance-fundamentals
|
| 447 |
+
How Google does Machine Learning,Google Cloud,https://www.coursera.org/learn/google-machine-learning,google-machine-learning
|
| 448 |
+
Object-Oriented Design,University of Alberta,https://www.coursera.org/learn/object-oriented-design,object-oriented-design
|
| 449 |
+
Introduction to Intellectual Property,University of Pennsylvania,https://www.coursera.org/learn/introduction-intellectual-property,introduction-intellectual-property
|
| 450 |
+
Cost and Economics in Pricing Strategy,BCG,https://www.coursera.org/learn/uva-darden-bcg-pricing-strategy-cost-economics,uva-darden-bcg-pricing-strategy-cost-economics
|
| 451 |
+
Write A Feature Length Screenplay For Film Or Television,Michigan State University,https://www.coursera.org/learn/write-a-feature-length-screenplay-for-film-or-television,write-a-feature-length-screenplay-for-film-or-television
|
| 452 |
+
Marketing Gerencial,Universidad de Chile,https://www.coursera.org/learn/marketing-gerencial,marketing-gerencial
|
| 453 |
+
Corporate Finance Essentials,IESE Business School,https://www.coursera.org/learn/corporate-finance-essentials,corporate-finance-essentials
|
| 454 |
+
Information Security: Context and Introduction,"Royal Holloway, University of London",https://www.coursera.org/learn/information-security-data,information-security-data
|
| 455 |
+
"Anatomy of the Chest, Abdomen, and Pelvis",Yale University,https://www.coursera.org/learn/trunk-anatomy,trunk-anatomy
|
| 456 |
+
Introduction to CSS3,University of Michigan,https://www.coursera.org/learn/introcss,introcss
|
| 457 |
+
Applied Data Science Capstone,IBM,https://www.coursera.org/learn/applied-data-science-capstone,applied-data-science-capstone
|
| 458 |
+
Introduction aux Droits de l’Homme,University of Geneva,https://www.coursera.org/learn/droits-de-lhomme,droits-de-lhomme
|
| 459 |
+
"Programming Languages, Part A",University of Washington,https://www.coursera.org/learn/programming-languages,programming-languages
|
| 460 |
+
Big History: Connecting Knowledge,Macquarie University,https://www.coursera.org/learn/big-history,big-history
|
| 461 |
+
Leadership in 21st Century Organizations,Copenhagen Business School,https://www.coursera.org/learn/leadership-21st-century,leadership-21st-century
|
| 462 |
+
Software Processes and Agile Practices,University of Alberta,https://www.coursera.org/learn/software-processes-and-agile-practices,software-processes-and-agile-practices
|
| 463 |
+
DevOps Culture and Mindset,"University of California, Davis",https://www.coursera.org/learn/devops-culture-and-mindset,devops-culture-and-mindset
|
| 464 |
+
Introduction to Statistics & Data Analysis in Public Health,Imperial College London,https://www.coursera.org/learn/introduction-statistics-data-analysis-public-health,introduction-statistics-data-analysis-public-health
|
| 465 |
+
Discrete Optimization,The University of Melbourne,https://www.coursera.org/learn/discrete-optimization,discrete-optimization
|
| 466 |
+
Основы разработки на C++: белый пояс,Moscow Institute of Physics and Technology,https://www.coursera.org/learn/c-plus-plus-white,c-plus-plus-white
|
| 467 |
+
COVID-19 Contact Tracing For Nursing Professionals,University of Houston,https://www.coursera.org/learn/covid-19-contact-tracing-for-nursing-professionals,covid-19-contact-tracing-for-nursing-professionals
|
| 468 |
+
High Stakes Leadership: Leading in Times of Crisis,University of Michigan,https://www.coursera.org/learn/high-stakes-leadership,high-stakes-leadership
|
| 469 |
+
Essential Epidemiologic Tools for Public Health Practice,Johns Hopkins University,https://www.coursera.org/learn/epidemiology-tools,epidemiology-tools
|
| 470 |
+
Epigenetic Control of Gene Expression,The University of Melbourne,https://www.coursera.org/learn/epigenetics,epigenetics
|
| 471 |
+
"Recruiting, Hiring, and Onboarding Employees",University of Minnesota,https://www.coursera.org/learn/recruiting-hiring-onboarding-employees,recruiting-hiring-onboarding-employees
|
| 472 |
+
AWS Fundamentals: Addressing Security Risk,Amazon Web Services,https://www.coursera.org/learn/aws-fundamentals-addressing-security-risk,aws-fundamentals-addressing-security-risk
|
| 473 |
+
Ancient Philosophy: Plato & His Predecessors,University of Pennsylvania,https://www.coursera.org/learn/plato,plato
|
| 474 |
+
Becoming a changemaker: Introduction to Social Innovation,University of Cape Town,https://www.coursera.org/learn/social-innovation,social-innovation
|
| 475 |
+
Spanish for Successful Communication in Healthcare Settings,Rice University,https://www.coursera.org/learn/spanish-in-healthcare-settings,spanish-in-healthcare-settings
|
| 476 |
+
Leading transformations: Manage change,Macquarie University,https://www.coursera.org/learn/change-management,change-management
|
| 477 |
+
Introduction to Systems Engineering,UNSW Sydney (The University of New South Wales),https://www.coursera.org/learn/systems-engineering,systems-engineering
|
| 478 |
+
Digital Marketing Analytics in Theory,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/marketing-analytics,marketing-analytics
|
| 479 |
+
Data Visualization and Communication with Tableau,Duke University,https://www.coursera.org/learn/analytics-tableau,analytics-tableau
|
| 480 |
+
Bayesian Statistics: Techniques and Models,"University of California, Santa Cruz",https://www.coursera.org/learn/mcmc-bayesian-statistics,mcmc-bayesian-statistics
|
| 481 |
+
Human Rights for Open Societies,Utrecht University,https://www.coursera.org/learn/humanrights,humanrights
|
| 482 |
+
Introduction to Computers and Office Productivity Software,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/introduction-to-computers-and-office-productivity-software,introduction-to-computers-and-office-productivity-software
|
| 483 |
+
The Introduction to Quantum Computing,Saint Petersburg State University,https://www.coursera.org/learn/quantum-computing-algorithms,quantum-computing-algorithms
|
| 484 |
+
Intercultural Management,ESCP Business School,https://www.coursera.org/learn/intercultural,intercultural
|
| 485 |
+
Get Interactive: Practical Teaching with Technology,University of London,https://www.coursera.org/learn/getinmooc,getinmooc
|
| 486 |
+
International Law In Action: Investigating and Prosecuting International Crimes,Universiteit Leiden,https://www.coursera.org/learn/international-law-in-action-2,international-law-in-action-2
|
| 487 |
+
Global Financial Markets and Instruments,Rice University,https://www.coursera.org/learn/global-financial-markets-instruments,global-financial-markets-instruments
|
| 488 |
+
Write Your First Novel,Michigan State University,https://www.coursera.org/learn/write-your-first-novel,write-your-first-novel
|
| 489 |
+
Développement psychologique de l'enfant,University of Geneva,https://www.coursera.org/learn/enfant-developpement,enfant-developpement
|
| 490 |
+
Songwriting: Writing the Lyrics,Berklee College of Music,https://www.coursera.org/learn/songwriting-lyrics,songwriting-lyrics
|
| 491 |
+
"Applied Plotting, Charting & Data Representation in Python",University of Michigan,https://www.coursera.org/learn/python-plotting,python-plotting
|
| 492 |
+
Systems Thinking In Public Health,Johns Hopkins University,https://www.coursera.org/learn/systems-thinking,systems-thinking
|
| 493 |
+
Excel Skills for Business: Advanced,Macquarie University,https://www.coursera.org/learn/excel-advanced,excel-advanced
|
| 494 |
+
Introduction to Neuroeconomics: How the Brain Makes Decisions,National Research University Higher School of Economics,https://www.coursera.org/learn/neuroeconomics,neuroeconomics
|
| 495 |
+
Community Organizing for Social Justice,University of Michigan,https://www.coursera.org/learn/community-organizing,community-organizing
|
| 496 |
+
Build a Modern Computer from First Principles: Nand to Tetris Part II (project-centered course),Hebrew University of Jerusalem,https://www.coursera.org/learn/nand2tetris2,nand2tetris2
|
| 497 |
+
The Global Financial Crisis,Yale University,https://www.coursera.org/learn/global-financial-crisis,global-financial-crisis
|
| 498 |
+
Agile и Scrum в работе над проектами и продуктами,E-Learning Development Fund,https://www.coursera.org/learn/upravleniya-proektami-agile-scrum,upravleniya-proektami-agile-scrum
|
| 499 |
+
Renewable Energy and Green Building Entrepreneurship,Duke University,https://www.coursera.org/learn/renewable-energy-entrepreneurship,renewable-energy-entrepreneurship
|
| 500 |
+
The Cycle: Management of Successful Arts and Cultural Organizations,"University of Maryland, College Park",https://www.coursera.org/learn/the-cycle,the-cycle
|
| 501 |
+
Privacy Law and Data Protection,University of Pennsylvania,https://www.coursera.org/learn/privacy-law-data-protection,privacy-law-data-protection
|
| 502 |
+
Building Modern Python Applications on AWS,Amazon Web Services,https://www.coursera.org/learn/building-modern-python-applications-on-aws,building-modern-python-applications-on-aws
|
| 503 |
+
Digital Business Models,Lund University,https://www.coursera.org/learn/digital-business-models,digital-business-models
|
| 504 |
+
"Everyday Excel, Part 2",University of Colorado Boulder,https://www.coursera.org/learn/everyday-excel-part-2,everyday-excel-part-2
|
| 505 |
+
Reliable Google Cloud Infrastructure: Design and Process,Google Cloud,https://www.coursera.org/learn/cloud-infrastructure-design-process,cloud-infrastructure-design-process
|
| 506 |
+
Introduction to Computer Programming,University of London,https://www.coursera.org/learn/introduction-to-computer-programming,introduction-to-computer-programming
|
| 507 |
+
"Big Data Essentials: HDFS, MapReduce and Spark RDD",Yandex,https://www.coursera.org/learn/big-data-essentials,big-data-essentials
|
| 508 |
+
Dermatology: Trip to skin,Novosibirsk State University ,https://www.coursera.org/learn/dermatology,dermatology
|
| 509 |
+
Sustainable Tourism – promoting environmental public health,University of Copenhagen,https://www.coursera.org/learn/sustainable-tourism,sustainable-tourism
|
| 510 |
+
Population Health During A Pandemic: Contact Tracing and Beyond,University of Houston,https://www.coursera.org/learn/contact-tracing-for-covid-19,contact-tracing-for-covid-19
|
| 511 |
+
Social Impact Strategy: Tools for Entrepreneurs and Innovators,University of Pennsylvania,https://www.coursera.org/learn/social-impact,social-impact
|
| 512 |
+
C for Everyone: Programming Fundamentals,"University of California, Santa Cruz",https://www.coursera.org/learn/c-for-everyone,c-for-everyone
|
| 513 |
+
Introduction to Structured Query Language (SQL),University of Michigan,https://www.coursera.org/learn/intro-sql,intro-sql
|
| 514 |
+
Social and Economic Networks: Models and Analysis,Stanford University,https://www.coursera.org/learn/social-economic-networks,social-economic-networks
|
| 515 |
+
The Truth About Cats and Dogs,The University of Edinburgh,https://www.coursera.org/learn/cats-and-dogs,cats-and-dogs
|
| 516 |
+
Sports and Society,Duke University,https://www.coursera.org/learn/sports-society,sports-society
|
| 517 |
+
Fundamentals of Scalable Data Science,IBM,https://www.coursera.org/learn/ds,ds
|
| 518 |
+
Effective Compliance Programs,University of Pennsylvania,https://www.coursera.org/learn/effective-compliance-programs,effective-compliance-programs
|
| 519 |
+
Transformation of the Global Food System,University of Copenhagen,https://www.coursera.org/learn/transformation-global-food-system,transformation-global-food-system
|
| 520 |
+
Web Application Technologies and Django,University of Michigan,https://www.coursera.org/learn/django-database-web-apps,django-database-web-apps
|
| 521 |
+
Curanderismo: Traditional Healing Using Plants,University of New Mexico,https://www.coursera.org/learn/curanderismo-plants,curanderismo-plants
|
| 522 |
+
Applied Machine Learning in Python,University of Michigan,https://www.coursera.org/learn/python-machine-learning,python-machine-learning
|
| 523 |
+
Troubleshooting and Debugging Techniques,Google,https://www.coursera.org/learn/troubleshooting-debugging-techniques,troubleshooting-debugging-techniques
|
| 524 |
+
Introduction to C# Programming and Unity,University of Colorado System,https://www.coursera.org/learn/introduction-programming-unity,introduction-programming-unity
|
| 525 |
+
"Corrección, estilo y variaciones de la lengua española",Universitat Autònoma de Barcelona,https://www.coursera.org/learn/correccion-estilo-variaciones,correccion-estilo-variaciones
|
| 526 |
+
Les Fondamentaux de la Négociation,ESSEC Business School,https://www.coursera.org/learn/fondamentaux-negociation,fondamentaux-negociation
|
| 527 |
+
Introduction to Clinical Data,Stanford University,https://www.coursera.org/learn/introduction-clinical-data,introduction-clinical-data
|
| 528 |
+
The Science of Success: What Researchers Know that You Should Know,University of Michigan,https://www.coursera.org/learn/success,success
|
| 529 |
+
Finance for Non-Financial Managers,Emory University,https://www.coursera.org/learn/finance-for-non-financial-managers,finance-for-non-financial-managers
|
| 530 |
+
Getting Started With Music Theory,Michigan State University,https://www.coursera.org/learn/music-theory,music-theory
|
| 531 |
+
Digital Marketing Analytics in Practice,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/digital-analytics,digital-analytics
|
| 532 |
+
The Horse Course: Introduction to Basic Care and Management,University of Florida,https://www.coursera.org/learn/horse-care,horse-care
|
| 533 |
+
Verb Tenses and Passives,"University of California, Irvine",https://www.coursera.org/learn/verb-passives,verb-passives
|
| 534 |
+
Gestión de organizaciones efectivas,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/gestion-organizaciones-efectivas,gestion-organizaciones-efectivas
|
| 535 |
+
Introduction to Big Data,University of California San Diego,https://www.coursera.org/learn/big-data-introduction,big-data-introduction
|
| 536 |
+
Rédaction de contrats,University of Geneva,https://www.coursera.org/learn/contrats,contrats
|
| 537 |
+
Philosophy and the Sciences: Introduction to the Philosophy of Cognitive Sciences,The University of Edinburgh,https://www.coursera.org/learn/philosophy-cognitive-sciences,philosophy-cognitive-sciences
|
| 538 |
+
Schizophrenia,Wesleyan University,https://www.coursera.org/learn/schizophrenia,schizophrenia
|
| 539 |
+
Ecology: Ecosystem Dynamics and Conservation,Howard Hughes Medical Institute ,https://www.coursera.org/learn/ecology-conservation,ecology-conservation
|
| 540 |
+
Introduction to Game Development,Michigan State University,https://www.coursera.org/learn/game-development,game-development
|
| 541 |
+
Practical Time Series Analysis,The State University of New York,https://www.coursera.org/learn/practical-time-series-analysis,practical-time-series-analysis
|
| 542 |
+
VLSI CAD Part I: Logic,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/vlsi-cad-logic,vlsi-cad-logic
|
| 543 |
+
Getting Started with Google Kubernetes Engine,Google Cloud,https://www.coursera.org/learn/google-kubernetes-engine,google-kubernetes-engine
|
| 544 |
+
Exploring and Preparing your Data with BigQuery,Google Cloud,https://www.coursera.org/learn/gcp-exploring-preparing-data-bigquery,gcp-exploring-preparing-data-bigquery
|
| 545 |
+
Six Sigma and the Organization (Advanced),University System of Georgia,https://www.coursera.org/learn/six-sigma-organization-advanced,six-sigma-organization-advanced
|
| 546 |
+
Social Work Practice: Advocating Social Justice and Change,University of Michigan,https://www.coursera.org/learn/social-work-practice-advocating-social-justice-and-change,social-work-practice-advocating-social-justice-and-change
|
| 547 |
+
Introduction to Machine Learning,Duke University,https://www.coursera.org/learn/machine-learning-duke,machine-learning-duke
|
| 548 |
+
Entrepreneurship 2: Launching your Start-Up,University of Pennsylvania,https://www.coursera.org/learn/wharton-launching-startup,wharton-launching-startup
|
| 549 |
+
Nanotechnology: A Maker’s Course,Duke University,https://www.coursera.org/learn/nanotechnology,nanotechnology
|
| 550 |
+
Creative Problem Solving,University of Minnesota,https://www.coursera.org/learn/creative-problem-solving,creative-problem-solving
|
| 551 |
+
"Sleep: Neurobiology, Medicine, and Society",University of Michigan,https://www.coursera.org/learn/sleep,sleep
|
| 552 |
+
COVID-19: What You Need to Know (CME Eligible),Osmosis,https://www.coursera.org/learn/covid-19-what-you-need-to-know,covid-19-what-you-need-to-know
|
| 553 |
+
Classical Sociological Theory,University of Amsterdam,https://www.coursera.org/learn/classical-sociological-theory,classical-sociological-theory
|
| 554 |
+
Electric Industry Operations and Markets,Duke University,https://www.coursera.org/learn/electricity,electricity
|
| 555 |
+
Preparing for the Google Cloud Professional Cloud Architect Exam,Google Cloud,https://www.coursera.org/learn/preparing-cloud-professional-cloud-architect-exam,preparing-cloud-professional-cloud-architect-exam
|
| 556 |
+
Effective Business Presentations with Powerpoint,PwC,https://www.coursera.org/learn/powerpoint-presentations,powerpoint-presentations
|
| 557 |
+
More Introduction to Financial Accounting,University of Pennsylvania,https://www.coursera.org/learn/wharton-financial-accounting,wharton-financial-accounting
|
| 558 |
+
Cryptocurrency and Blockchain: An Introduction to Digital Currencies,University of Pennsylvania,https://www.coursera.org/learn/wharton-cryptocurrency-blockchain-introduction-digital-currency,wharton-cryptocurrency-blockchain-introduction-digital-currency
|
| 559 |
+
Dairy Production and Management,The Pennsylvania State University,https://www.coursera.org/learn/dairy-production,dairy-production
|
| 560 |
+
Think Again I: How to Understand Arguments,Duke University,https://www.coursera.org/learn/understanding-arguments,understanding-arguments
|
| 561 |
+
Developing Your Musicianship,Berklee College of Music,https://www.coursera.org/learn/develop-your-musicianship,develop-your-musicianship
|
| 562 |
+
Introduction to Operations Management,University of Pennsylvania,https://www.coursera.org/learn/wharton-operations,wharton-operations
|
| 563 |
+
The Oral Cavity: Portal to Health and Disease,University of Pennsylvania,https://www.coursera.org/learn/oralcavity,oralcavity
|
| 564 |
+
Preparing for the Google Cloud Associate Cloud Engineer Exam,Google Cloud,https://www.coursera.org/learn/preparing-cloud-associate-cloud-engineer-exam,preparing-cloud-associate-cloud-engineer-exam
|
| 565 |
+
Fundamental Neuroscience for Neuroimaging,Johns Hopkins University,https://www.coursera.org/learn/neuroscience-neuroimaging,neuroscience-neuroimaging
|
| 566 |
+
European Business Law: Understanding the Fundamentals,Lund University,https://www.coursera.org/learn/european-law-fundamentals,european-law-fundamentals
|
| 567 |
+
Teach English Now! Teaching Language Online,Arizona State University,https://www.coursera.org/learn/teachlanguageonline,teachlanguageonline
|
| 568 |
+
Front-End Web Development with React,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/front-end-react,front-end-react
|
| 569 |
+
Music Business Foundations,Berklee College of Music,https://www.coursera.org/learn/music-business-foundations,music-business-foundations
|
| 570 |
+
Introduction to Business Analytics: Communicating with Data,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/intro-business-analytics,intro-business-analytics
|
| 571 |
+
Security & Safety Challenges in a Globalized World,Universiteit Leiden,https://www.coursera.org/learn/security-safety-globalized-world,security-safety-globalized-world
|
| 572 |
+
Machine Learning Foundations: A Case Study Approach,University of Washington,https://www.coursera.org/learn/ml-foundations,ml-foundations
|
| 573 |
+
Accounting Analytics,University of Pennsylvania,https://www.coursera.org/learn/accounting-analytics,accounting-analytics
|
| 574 |
+
Strategic Business Management - Microeconomics,"University of California, Irvine",https://www.coursera.org/learn/strategic-business-management-microeconomics,strategic-business-management-microeconomics
|
| 575 |
+
"Epidemics, Pandemics and Outbreaks",University of Pittsburgh,https://www.coursera.org/learn/epidemic-pandemic-outbreak,epidemic-pandemic-outbreak
|
| 576 |
+
Roman Architecture,Yale University,https://www.coursera.org/learn/roman-architecture,roman-architecture
|
| 577 |
+
Research Data Management and Sharing,The University of Edinburgh,https://www.coursera.org/learn/data-management,data-management
|
| 578 |
+
Introduction to Genomic Technologies,Johns Hopkins University,https://www.coursera.org/learn/introduction-genomics,introduction-genomics
|
| 579 |
+
Strategic Management,Copenhagen Business School,https://www.coursera.org/learn/strategic-management,strategic-management
|
| 580 |
+
Cybersecurity Compliance Framework & System Administration,IBM,https://www.coursera.org/learn/cybersecurity-compliance-framework-system-administration,cybersecurity-compliance-framework-system-administration
|
| 581 |
+
Legal Tech & Startups,IE Business School,https://www.coursera.org/learn/legal-tech-startups,legal-tech-startups
|
| 582 |
+
Introduction to Chemistry: Reactions and Ratios,Duke University,https://www.coursera.org/learn/intro-chemistry,intro-chemistry
|
| 583 |
+
The Science of Stem Cells,American Museum of Natural History,https://www.coursera.org/learn/stem-cells,stem-cells
|
| 584 |
+
The Business of Product Management I,Advancing Women in Product,https://www.coursera.org/learn/the-business-of-product-management-one,the-business-of-product-management-one
|
| 585 |
+
Positive Psychology: Resilience Skills,University of Pennsylvania,https://www.coursera.org/learn/positive-psychology-resilience,positive-psychology-resilience
|
| 586 |
+
AI for Medical Prognosis,DeepLearning.AI,https://www.coursera.org/learn/ai-for-medical-prognosis,ai-for-medical-prognosis
|
| 587 |
+
Antibiotic Stewardship,Stanford University,https://www.coursera.org/learn/antibiotic-stewardship,antibiotic-stewardship
|
| 588 |
+
UX / UI: Fundamentos para o design de interface,Universidade de São Paulo,https://www.coursera.org/learn/ux-ui-design-de-interface,ux-ui-design-de-interface
|
| 589 |
+
EMT Foundations,University of Colorado System,https://www.coursera.org/learn/emt-foundations,emt-foundations
|
| 590 |
+
Industrial IoT on Google Cloud Platform,Google Cloud,https://www.coursera.org/learn/iiot-google-cloud-platform,iiot-google-cloud-platform
|
| 591 |
+
"Penetration Testing, Incident Response and Forensics",IBM,https://www.coursera.org/learn/ibm-penetration-testing-incident-response-forensics,ibm-penetration-testing-incident-response-forensics
|
| 592 |
+
Database Management Essentials,University of Colorado System,https://www.coursera.org/learn/database-management,database-management
|
| 593 |
+
Advertising and Society,Duke University,https://www.coursera.org/learn/role-of-advertising,role-of-advertising
|
| 594 |
+
Everyday Chinese Medicine,The Chinese University of Hong Kong,https://www.coursera.org/learn/everyday-chinese-medicine,everyday-chinese-medicine
|
| 595 |
+
Fundamentals of Machine Learning for Healthcare,Stanford University,https://www.coursera.org/learn/fundamental-machine-learning-healthcare,fundamental-machine-learning-healthcare
|
| 596 |
+
New Approaches to Countering Terror: Countering Violent Extremism,"University of Maryland, College Park",https://www.coursera.org/learn/countering-terror-violent-extremism,countering-terror-violent-extremism
|
| 597 |
+
Magic in the Middle Ages,Universitat de Barcelona,https://www.coursera.org/learn/magic-middle-ages,magic-middle-ages
|
| 598 |
+
Challenging Forensic Science: How Science Should Speak to Court,University of Lausanne,https://www.coursera.org/learn/challenging-forensic-science,challenging-forensic-science
|
| 599 |
+
Exploring Renewable Energy Schemes,University of Pennsylvania,https://www.coursera.org/learn/exploring-renewable-energy,exploring-renewable-energy
|
| 600 |
+
Paleontology: Theropod Dinosaurs and the Origin of Birds,University of Alberta,https://www.coursera.org/learn/theropods-birds,theropods-birds
|
| 601 |
+
Corporate Strategy,UCL School of Management,https://www.coursera.org/learn/corporatestrategy,corporatestrategy
|
| 602 |
+
Getting Started with Essay Writing,"University of California, Irvine",https://www.coursera.org/learn/getting-started-with-essay-writing,getting-started-with-essay-writing
|
| 603 |
+
Actualización en el manejo del paciente con diabetes mellitus tipo 2,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/actualizacion-manejo-diabetes-tipo-2,actualizacion-manejo-diabetes-tipo-2
|
| 604 |
+
Mastering Final Cut Pro,LearnQuest,https://www.coursera.org/learn/mastering-final-cut-pro,mastering-final-cut-pro
|
| 605 |
+
Removing Barriers to Change,University of Pennsylvania,https://www.coursera.org/learn/removing-barriers-to-change,removing-barriers-to-change
|
| 606 |
+
Tricky American English Pronunciation,"University of California, Irvine",https://www.coursera.org/learn/tricky-american-english-pronunciation,tricky-american-english-pronunciation
|
| 607 |
+
Managing an Agile Team,University of Virginia,https://www.coursera.org/learn/uva-darden-agile-team-management,uva-darden-agile-team-management
|
| 608 |
+
International Law in Action: A Guide to the International Courts and Tribunals in The Hague,Universiteit Leiden,https://www.coursera.org/learn/international-law-in-action,international-law-in-action
|
| 609 |
+
"Competencias Laborales: Perfiles, Evaluación y Capacitación.",Universidad de Chile,https://www.coursera.org/learn/competencias-lab,competencias-lab
|
| 610 |
+
Getting started with TensorFlow 2,Imperial College London,https://www.coursera.org/learn/getting-started-with-tensor-flow2,getting-started-with-tensor-flow2
|
| 611 |
+
Geopolitics of Europe,Sciences Po,https://www.coursera.org/learn/geopolitics-europe,geopolitics-europe
|
| 612 |
+
Osteoarchaeology: The Truth in Our Bones,Universiteit Leiden,https://www.coursera.org/learn/truthinourbones-osteoarchaeology-archaeology,truthinourbones-osteoarchaeology-archaeology
|
| 613 |
+
Mathematics for Machine Learning: PCA,Imperial College London,https://www.coursera.org/learn/pca-machine-learning,pca-machine-learning
|
| 614 |
+
Object Oriented Programming in Java,University of California San Diego,https://www.coursera.org/learn/object-oriented-java,object-oriented-java
|
| 615 |
+
Embedded Software and Hardware Architecture,University of Colorado Boulder,https://www.coursera.org/learn/embedded-software-hardware,embedded-software-hardware
|
| 616 |
+
Memoir and Personal Essay: Managing Your Relationship with the Reader,Wesleyan University,https://www.coursera.org/learn/memoir-reader-relationship,memoir-reader-relationship
|
| 617 |
+
Математика и Python для анализа данных,Moscow Institute of Physics and Technology,https://www.coursera.org/learn/mathematics-and-python,mathematics-and-python
|
| 618 |
+
Hacia una práctica constructivista en el aula,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/aulaconstructivista,aulaconstructivista
|
| 619 |
+
Hypothesis-Driven Development,University of Virginia,https://www.coursera.org/learn/uva-darden-agile-testing,uva-darden-agile-testing
|
| 620 |
+
Accounting Data Analytics with Python,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/accounting-data-analytics-python,accounting-data-analytics-python
|
| 621 |
+
Introduction to Molecular Spectroscopy,University of Manchester ,https://www.coursera.org/learn/spectroscopy,spectroscopy
|
| 622 |
+
Managing as a Coach,"University of California, Davis",https://www.coursera.org/learn/managing-as-a-coach,managing-as-a-coach
|
| 623 |
+
The fundamentals of hotel distribution,ESSEC Business School,https://www.coursera.org/learn/hotel-distribution,hotel-distribution
|
| 624 |
+
A Crash Course in Data Science,Johns Hopkins University,https://www.coursera.org/learn/data-science-course,data-science-course
|
course_feedback_nlp/Untitled.ipynb
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"id": "2c0bc557-3218-4715-900e-491cc5560b6a",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"import pandas as pd\n",
|
| 11 |
+
"import numpy as np"
|
| 12 |
+
]
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"execution_count": 11,
|
| 17 |
+
"id": "a2d59d9a-5855-4a21-9988-3ea5dd2bb43c",
|
| 18 |
+
"metadata": {},
|
| 19 |
+
"outputs": [],
|
| 20 |
+
"source": [
|
| 21 |
+
"reviews_df = pd.read_csv(\"Coursera_reviews.csv\")\n",
|
| 22 |
+
"courses_df = pd.read_csv(\"Coursera_courses.csv\")"
|
| 23 |
+
]
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"cell_type": "code",
|
| 27 |
+
"execution_count": 12,
|
| 28 |
+
"id": "7af404ff-07f8-489c-b350-263cb33bb277",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"outputs": [
|
| 31 |
+
{
|
| 32 |
+
"data": {
|
| 33 |
+
"text/html": [
|
| 34 |
+
"<div>\n",
|
| 35 |
+
"<style scoped>\n",
|
| 36 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 37 |
+
" vertical-align: middle;\n",
|
| 38 |
+
" }\n",
|
| 39 |
+
"\n",
|
| 40 |
+
" .dataframe tbody tr th {\n",
|
| 41 |
+
" vertical-align: top;\n",
|
| 42 |
+
" }\n",
|
| 43 |
+
"\n",
|
| 44 |
+
" .dataframe thead th {\n",
|
| 45 |
+
" text-align: right;\n",
|
| 46 |
+
" }\n",
|
| 47 |
+
"</style>\n",
|
| 48 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 49 |
+
" <thead>\n",
|
| 50 |
+
" <tr style=\"text-align: right;\">\n",
|
| 51 |
+
" <th></th>\n",
|
| 52 |
+
" <th>reviews</th>\n",
|
| 53 |
+
" <th>reviewers</th>\n",
|
| 54 |
+
" <th>date_reviews</th>\n",
|
| 55 |
+
" <th>rating</th>\n",
|
| 56 |
+
" <th>course_id</th>\n",
|
| 57 |
+
" </tr>\n",
|
| 58 |
+
" </thead>\n",
|
| 59 |
+
" <tbody>\n",
|
| 60 |
+
" <tr>\n",
|
| 61 |
+
" <th>0</th>\n",
|
| 62 |
+
" <td>Pretty dry, but I was able to pass with just t...</td>\n",
|
| 63 |
+
" <td>By Robert S</td>\n",
|
| 64 |
+
" <td>Feb 12, 2020</td>\n",
|
| 65 |
+
" <td>4</td>\n",
|
| 66 |
+
" <td>google-cbrs-cpi-training</td>\n",
|
| 67 |
+
" </tr>\n",
|
| 68 |
+
" <tr>\n",
|
| 69 |
+
" <th>1</th>\n",
|
| 70 |
+
" <td>would be a better experience if the video and ...</td>\n",
|
| 71 |
+
" <td>By Gabriel E R</td>\n",
|
| 72 |
+
" <td>Sep 28, 2020</td>\n",
|
| 73 |
+
" <td>4</td>\n",
|
| 74 |
+
" <td>google-cbrs-cpi-training</td>\n",
|
| 75 |
+
" </tr>\n",
|
| 76 |
+
" <tr>\n",
|
| 77 |
+
" <th>2</th>\n",
|
| 78 |
+
" <td>Information was perfect! The program itself wa...</td>\n",
|
| 79 |
+
" <td>By Jacob D</td>\n",
|
| 80 |
+
" <td>Apr 08, 2020</td>\n",
|
| 81 |
+
" <td>4</td>\n",
|
| 82 |
+
" <td>google-cbrs-cpi-training</td>\n",
|
| 83 |
+
" </tr>\n",
|
| 84 |
+
" <tr>\n",
|
| 85 |
+
" <th>3</th>\n",
|
| 86 |
+
" <td>A few grammatical mistakes on test made me do ...</td>\n",
|
| 87 |
+
" <td>By Dale B</td>\n",
|
| 88 |
+
" <td>Feb 24, 2020</td>\n",
|
| 89 |
+
" <td>4</td>\n",
|
| 90 |
+
" <td>google-cbrs-cpi-training</td>\n",
|
| 91 |
+
" </tr>\n",
|
| 92 |
+
" <tr>\n",
|
| 93 |
+
" <th>4</th>\n",
|
| 94 |
+
" <td>Excellent course and the training provided was...</td>\n",
|
| 95 |
+
" <td>By Sean G</td>\n",
|
| 96 |
+
" <td>Jun 18, 2020</td>\n",
|
| 97 |
+
" <td>4</td>\n",
|
| 98 |
+
" <td>google-cbrs-cpi-training</td>\n",
|
| 99 |
+
" </tr>\n",
|
| 100 |
+
" </tbody>\n",
|
| 101 |
+
"</table>\n",
|
| 102 |
+
"</div>"
|
| 103 |
+
],
|
| 104 |
+
"text/plain": [
|
| 105 |
+
" reviews reviewers \\\n",
|
| 106 |
+
"0 Pretty dry, but I was able to pass with just t... By Robert S \n",
|
| 107 |
+
"1 would be a better experience if the video and ... By Gabriel E R \n",
|
| 108 |
+
"2 Information was perfect! The program itself wa... By Jacob D \n",
|
| 109 |
+
"3 A few grammatical mistakes on test made me do ... By Dale B \n",
|
| 110 |
+
"4 Excellent course and the training provided was... By Sean G \n",
|
| 111 |
+
"\n",
|
| 112 |
+
" date_reviews rating course_id \n",
|
| 113 |
+
"0 Feb 12, 2020 4 google-cbrs-cpi-training \n",
|
| 114 |
+
"1 Sep 28, 2020 4 google-cbrs-cpi-training \n",
|
| 115 |
+
"2 Apr 08, 2020 4 google-cbrs-cpi-training \n",
|
| 116 |
+
"3 Feb 24, 2020 4 google-cbrs-cpi-training \n",
|
| 117 |
+
"4 Jun 18, 2020 4 google-cbrs-cpi-training "
|
| 118 |
+
]
|
| 119 |
+
},
|
| 120 |
+
"execution_count": 12,
|
| 121 |
+
"metadata": {},
|
| 122 |
+
"output_type": "execute_result"
|
| 123 |
+
}
|
| 124 |
+
],
|
| 125 |
+
"source": [
|
| 126 |
+
"reviews_df.head()"
|
| 127 |
+
]
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"cell_type": "code",
|
| 131 |
+
"execution_count": 13,
|
| 132 |
+
"id": "8e1bef72-cba2-4431-b111-03bc0c872ee0",
|
| 133 |
+
"metadata": {},
|
| 134 |
+
"outputs": [
|
| 135 |
+
{
|
| 136 |
+
"data": {
|
| 137 |
+
"text/html": [
|
| 138 |
+
"<div>\n",
|
| 139 |
+
"<style scoped>\n",
|
| 140 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 141 |
+
" vertical-align: middle;\n",
|
| 142 |
+
" }\n",
|
| 143 |
+
"\n",
|
| 144 |
+
" .dataframe tbody tr th {\n",
|
| 145 |
+
" vertical-align: top;\n",
|
| 146 |
+
" }\n",
|
| 147 |
+
"\n",
|
| 148 |
+
" .dataframe thead th {\n",
|
| 149 |
+
" text-align: right;\n",
|
| 150 |
+
" }\n",
|
| 151 |
+
"</style>\n",
|
| 152 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 153 |
+
" <thead>\n",
|
| 154 |
+
" <tr style=\"text-align: right;\">\n",
|
| 155 |
+
" <th></th>\n",
|
| 156 |
+
" <th>name</th>\n",
|
| 157 |
+
" <th>institution</th>\n",
|
| 158 |
+
" <th>course_url</th>\n",
|
| 159 |
+
" <th>course_id</th>\n",
|
| 160 |
+
" </tr>\n",
|
| 161 |
+
" </thead>\n",
|
| 162 |
+
" <tbody>\n",
|
| 163 |
+
" <tr>\n",
|
| 164 |
+
" <th>0</th>\n",
|
| 165 |
+
" <td>Machine Learning</td>\n",
|
| 166 |
+
" <td>Stanford University</td>\n",
|
| 167 |
+
" <td>https://www.coursera.org/learn/machine-learning</td>\n",
|
| 168 |
+
" <td>machine-learning</td>\n",
|
| 169 |
+
" </tr>\n",
|
| 170 |
+
" <tr>\n",
|
| 171 |
+
" <th>1</th>\n",
|
| 172 |
+
" <td>Indigenous Canada</td>\n",
|
| 173 |
+
" <td>University of Alberta</td>\n",
|
| 174 |
+
" <td>https://www.coursera.org/learn/indigenous-canada</td>\n",
|
| 175 |
+
" <td>indigenous-canada</td>\n",
|
| 176 |
+
" </tr>\n",
|
| 177 |
+
" <tr>\n",
|
| 178 |
+
" <th>2</th>\n",
|
| 179 |
+
" <td>The Science of Well-Being</td>\n",
|
| 180 |
+
" <td>Yale University</td>\n",
|
| 181 |
+
" <td>https://www.coursera.org/learn/the-science-of-...</td>\n",
|
| 182 |
+
" <td>the-science-of-well-being</td>\n",
|
| 183 |
+
" </tr>\n",
|
| 184 |
+
" <tr>\n",
|
| 185 |
+
" <th>3</th>\n",
|
| 186 |
+
" <td>Technical Support Fundamentals</td>\n",
|
| 187 |
+
" <td>Google</td>\n",
|
| 188 |
+
" <td>https://www.coursera.org/learn/technical-suppo...</td>\n",
|
| 189 |
+
" <td>technical-support-fundamentals</td>\n",
|
| 190 |
+
" </tr>\n",
|
| 191 |
+
" <tr>\n",
|
| 192 |
+
" <th>4</th>\n",
|
| 193 |
+
" <td>Become a CBRS Certified Professional Installer...</td>\n",
|
| 194 |
+
" <td>Google - Spectrum Sharing</td>\n",
|
| 195 |
+
" <td>https://www.coursera.org/learn/google-cbrs-cpi...</td>\n",
|
| 196 |
+
" <td>google-cbrs-cpi-training</td>\n",
|
| 197 |
+
" </tr>\n",
|
| 198 |
+
" </tbody>\n",
|
| 199 |
+
"</table>\n",
|
| 200 |
+
"</div>"
|
| 201 |
+
],
|
| 202 |
+
"text/plain": [
|
| 203 |
+
" name \\\n",
|
| 204 |
+
"0 Machine Learning \n",
|
| 205 |
+
"1 Indigenous Canada \n",
|
| 206 |
+
"2 The Science of Well-Being \n",
|
| 207 |
+
"3 Technical Support Fundamentals \n",
|
| 208 |
+
"4 Become a CBRS Certified Professional Installer... \n",
|
| 209 |
+
"\n",
|
| 210 |
+
" institution \\\n",
|
| 211 |
+
"0 Stanford University \n",
|
| 212 |
+
"1 University of Alberta \n",
|
| 213 |
+
"2 Yale University \n",
|
| 214 |
+
"3 Google \n",
|
| 215 |
+
"4 Google - Spectrum Sharing \n",
|
| 216 |
+
"\n",
|
| 217 |
+
" course_url \\\n",
|
| 218 |
+
"0 https://www.coursera.org/learn/machine-learning \n",
|
| 219 |
+
"1 https://www.coursera.org/learn/indigenous-canada \n",
|
| 220 |
+
"2 https://www.coursera.org/learn/the-science-of-... \n",
|
| 221 |
+
"3 https://www.coursera.org/learn/technical-suppo... \n",
|
| 222 |
+
"4 https://www.coursera.org/learn/google-cbrs-cpi... \n",
|
| 223 |
+
"\n",
|
| 224 |
+
" course_id \n",
|
| 225 |
+
"0 machine-learning \n",
|
| 226 |
+
"1 indigenous-canada \n",
|
| 227 |
+
"2 the-science-of-well-being \n",
|
| 228 |
+
"3 technical-support-fundamentals \n",
|
| 229 |
+
"4 google-cbrs-cpi-training "
|
| 230 |
+
]
|
| 231 |
+
},
|
| 232 |
+
"execution_count": 13,
|
| 233 |
+
"metadata": {},
|
| 234 |
+
"output_type": "execute_result"
|
| 235 |
+
}
|
| 236 |
+
],
|
| 237 |
+
"source": [
|
| 238 |
+
"courses_df.head()"
|
| 239 |
+
]
|
| 240 |
+
},
|
| 241 |
+
{
|
| 242 |
+
"cell_type": "code",
|
| 243 |
+
"execution_count": 1,
|
| 244 |
+
"id": "0ba0e446-f8ac-4949-868f-2d70e282f25e",
|
| 245 |
+
"metadata": {},
|
| 246 |
+
"outputs": [
|
| 247 |
+
{
|
| 248 |
+
"data": {
|
| 249 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 250 |
+
"model_id": "0984692eeaa447a6a9dd70435c72e55d",
|
| 251 |
+
"version_major": 2,
|
| 252 |
+
"version_minor": 0
|
| 253 |
+
},
|
| 254 |
+
"text/plain": [
|
| 255 |
+
"config.json: 0%| | 0.00/483 [00:00<?, ?B/s]"
|
| 256 |
+
]
|
| 257 |
+
},
|
| 258 |
+
"metadata": {},
|
| 259 |
+
"output_type": "display_data"
|
| 260 |
+
},
|
| 261 |
+
{
|
| 262 |
+
"data": {
|
| 263 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 264 |
+
"model_id": "5bf297ecaf37442eb4b01f6a8ac2b69f",
|
| 265 |
+
"version_major": 2,
|
| 266 |
+
"version_minor": 0
|
| 267 |
+
},
|
| 268 |
+
"text/plain": [
|
| 269 |
+
"tokenizer_config.json: 0%| | 0.00/48.0 [00:00<?, ?B/s]"
|
| 270 |
+
]
|
| 271 |
+
},
|
| 272 |
+
"metadata": {},
|
| 273 |
+
"output_type": "display_data"
|
| 274 |
+
},
|
| 275 |
+
{
|
| 276 |
+
"data": {
|
| 277 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 278 |
+
"model_id": "6445393b608c4822bd90bdf2f1692a0e",
|
| 279 |
+
"version_major": 2,
|
| 280 |
+
"version_minor": 0
|
| 281 |
+
},
|
| 282 |
+
"text/plain": [
|
| 283 |
+
"vocab.txt: 0.00B [00:00, ?B/s]"
|
| 284 |
+
]
|
| 285 |
+
},
|
| 286 |
+
"metadata": {},
|
| 287 |
+
"output_type": "display_data"
|
| 288 |
+
},
|
| 289 |
+
{
|
| 290 |
+
"data": {
|
| 291 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 292 |
+
"model_id": "09f22b8c6976416d96c5af9bef5e25e4",
|
| 293 |
+
"version_major": 2,
|
| 294 |
+
"version_minor": 0
|
| 295 |
+
},
|
| 296 |
+
"text/plain": [
|
| 297 |
+
"tokenizer.json: 0.00B [00:00, ?B/s]"
|
| 298 |
+
]
|
| 299 |
+
},
|
| 300 |
+
"metadata": {},
|
| 301 |
+
"output_type": "display_data"
|
| 302 |
+
},
|
| 303 |
+
{
|
| 304 |
+
"data": {
|
| 305 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 306 |
+
"model_id": "6049ddbbb2e94125b20c61d3d9ab5cb4",
|
| 307 |
+
"version_major": 2,
|
| 308 |
+
"version_minor": 0
|
| 309 |
+
},
|
| 310 |
+
"text/plain": [
|
| 311 |
+
"model.safetensors: 0%| | 0.00/268M [00:00<?, ?B/s]"
|
| 312 |
+
]
|
| 313 |
+
},
|
| 314 |
+
"metadata": {},
|
| 315 |
+
"output_type": "display_data"
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"data": {
|
| 319 |
+
"application/vnd.jupyter.widget-view+json": {
|
| 320 |
+
"model_id": "3a7f44f7809e41fd86309b76ee57f0dc",
|
| 321 |
+
"version_major": 2,
|
| 322 |
+
"version_minor": 0
|
| 323 |
+
},
|
| 324 |
+
"text/plain": [
|
| 325 |
+
"pytorch_model.bin: 0%| | 0.00/268M [00:00<?, ?B/s]"
|
| 326 |
+
]
|
| 327 |
+
},
|
| 328 |
+
"metadata": {},
|
| 329 |
+
"output_type": "display_data"
|
| 330 |
+
},
|
| 331 |
+
{
|
| 332 |
+
"ename": "OSError",
|
| 333 |
+
"evalue": "distilbert/distilbert-base-uncased does not appear to have a file named pytorch_model.bin or model.safetensors.",
|
| 334 |
+
"output_type": "error",
|
| 335 |
+
"traceback": [
|
| 336 |
+
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
| 337 |
+
"\u001b[31mOSError\u001b[39m Traceback (most recent call last)",
|
| 338 |
+
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtransformers\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForMaskedLM\n\u001b[32m 4\u001b[39m tokenizer = AutoTokenizer.from_pretrained(\u001b[33m\"\u001b[39m\u001b[33mdistilbert/distilbert-base-uncased\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m model = \u001b[43mAutoModelForMaskedLM\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mdistilbert/distilbert-base-uncased\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
| 339 |
+
"\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\students\\course_feedback_nlp\\pytorch\\Lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:372\u001b[39m, in \u001b[36m_BaseAutoModelClass.from_pretrained\u001b[39m\u001b[34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[39m\n\u001b[32m 370\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m model_class.config_class == config.sub_configs.get(\u001b[33m\"\u001b[39m\u001b[33mtext_config\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m 371\u001b[39m config = config.get_text_config()\n\u001b[32m--> \u001b[39m\u001b[32m372\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 373\u001b[39m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[32m 374\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 375\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 376\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig.\u001b[34m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m.\u001b[34m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 377\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m, \u001b[39m\u001b[33m'\u001b[39m.join(c.\u001b[34m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m._model_mapping)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 378\u001b[39m )\n",
|
| 340 |
+
"\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\students\\course_feedback_nlp\\pytorch\\Lib\\site-packages\\transformers\\modeling_utils.py:4038\u001b[39m, in \u001b[36mPreTrainedModel.from_pretrained\u001b[39m\u001b[34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)\u001b[39m\n\u001b[32m 4033\u001b[39m logger.warning_once(\n\u001b[32m 4034\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mA kernel_config was provided but use_kernels is False; setting use_kernels=True automatically. To suppress this warning, explicitly set use_kernels to True.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 4035\u001b[39m )\n\u001b[32m 4036\u001b[39m use_kernels = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m4038\u001b[39m checkpoint_files, sharded_metadata = \u001b[43m_get_resolved_checkpoint_files\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 4039\u001b[39m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4040\u001b[39m \u001b[43m \u001b[49m\u001b[43mvariant\u001b[49m\u001b[43m=\u001b[49m\u001b[43mvariant\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4041\u001b[39m \u001b[43m \u001b[49m\u001b[43mgguf_file\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgguf_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4042\u001b[39m \u001b[43m \u001b[49m\u001b[43muse_safetensors\u001b[49m\u001b[43m=\u001b[49m\u001b[43muse_safetensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4043\u001b[39m \u001b[43m \u001b[49m\u001b[43mdownload_kwargs\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdownload_kwargs_with_commit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4044\u001b[39m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[43m=\u001b[49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4045\u001b[39m \u001b[43m \u001b[49m\u001b[43mis_remote_code\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_auto_class\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 4046\u001b[39m \u001b[43m \u001b[49m\u001b[43mtransformers_explicit_filename\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtransformers_weights\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4047\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4049\u001b[39m is_quantized = hf_quantizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 4051\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m gguf_file:\n",
|
| 341 |
+
"\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\students\\course_feedback_nlp\\pytorch\\Lib\\site-packages\\transformers\\modeling_utils.py:710\u001b[39m, in \u001b[36m_get_resolved_checkpoint_files\u001b[39m\u001b[34m(pretrained_model_name_or_path, variant, gguf_file, use_safetensors, user_agent, is_remote_code, transformers_explicit_filename, download_kwargs)\u001b[39m\n\u001b[32m 704\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\n\u001b[32m 705\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m does not appear to have a file named\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 706\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_add_variant(WEIGHTS_NAME,\u001b[38;5;250m \u001b[39mvariant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m but there is a file without the variant\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 707\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvariant\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m. Use `variant=None` to load this model from those weights.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 708\u001b[39m )\n\u001b[32m 709\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m710\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\n\u001b[32m 711\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m does not appear to have a file named\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 712\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_add_variant(WEIGHTS_NAME,\u001b[38;5;250m \u001b[39mvariant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m or \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_add_variant(SAFE_WEIGHTS_NAME,\u001b[38;5;250m \u001b[39mvariant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 713\u001b[39m )\n\u001b[32m 715\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m:\n\u001b[32m 716\u001b[39m \u001b[38;5;66;03m# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted\u001b[39;00m\n\u001b[32m 717\u001b[39m \u001b[38;5;66;03m# to the original exception.\u001b[39;00m\n\u001b[32m 718\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m\n",
|
| 342 |
+
"\u001b[31mOSError\u001b[39m: distilbert/distilbert-base-uncased does not appear to have a file named pytorch_model.bin or model.safetensors."
|
| 343 |
+
]
|
| 344 |
+
}
|
| 345 |
+
],
|
| 346 |
+
"source": [
|
| 347 |
+
"# Load model directly\n",
|
| 348 |
+
"from transformers import AutoTokenizer, AutoModelForMaskedLM\n",
|
| 349 |
+
"\n",
|
| 350 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"distilbert/distilbert-base-uncased\")\n",
|
| 351 |
+
"model = AutoModelForMaskedLM.from_pretrained(\"distilbert/distilbert-base-uncased\")"
|
| 352 |
+
]
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"cell_type": "code",
|
| 356 |
+
"execution_count": 2,
|
| 357 |
+
"id": "266c5278-0eb4-4daf-b18f-98d9d426ed70",
|
| 358 |
+
"metadata": {},
|
| 359 |
+
"outputs": [],
|
| 360 |
+
"source": [
|
| 361 |
+
"import transformers"
|
| 362 |
+
]
|
| 363 |
+
},
|
| 364 |
+
{
|
| 365 |
+
"cell_type": "code",
|
| 366 |
+
"execution_count": 3,
|
| 367 |
+
"id": "f687230b-a74c-4b63-92f9-f6bc378feecc",
|
| 368 |
+
"metadata": {},
|
| 369 |
+
"outputs": [
|
| 370 |
+
{
|
| 371 |
+
"data": {
|
| 372 |
+
"text/plain": [
|
| 373 |
+
"\u001b[31mType:\u001b[39m _LazyModule\n",
|
| 374 |
+
"\u001b[31mString form:\u001b[39m <module 'transformers' from 'C:\\\\Users\\\\PC\\\\Documents\\\\students\\\\course_feedback_nlp\\\\pytorch\\\\Lib\\\\site-packages\\\\transformers\\\\__init__.py'>\n",
|
| 375 |
+
"\u001b[31mFile:\u001b[39m c:\\users\\pc\\documents\\students\\course_feedback_nlp\\pytorch\\lib\\site-packages\\transformers\\__init__.py\n",
|
| 376 |
+
"\u001b[31mDocstring:\u001b[39m <no docstring>\n",
|
| 377 |
+
"\u001b[31mClass docstring:\u001b[39m Module class that surfaces all objects but only performs associated imports when the objects are requested."
|
| 378 |
+
]
|
| 379 |
+
},
|
| 380 |
+
"metadata": {},
|
| 381 |
+
"output_type": "display_data"
|
| 382 |
+
}
|
| 383 |
+
],
|
| 384 |
+
"source": [
|
| 385 |
+
"transformers?"
|
| 386 |
+
]
|
| 387 |
+
},
|
| 388 |
+
{
|
| 389 |
+
"cell_type": "code",
|
| 390 |
+
"execution_count": null,
|
| 391 |
+
"id": "bf2d9082-1d36-4fde-ad24-8e7599c65acc",
|
| 392 |
+
"metadata": {},
|
| 393 |
+
"outputs": [],
|
| 394 |
+
"source": []
|
| 395 |
+
}
|
| 396 |
+
],
|
| 397 |
+
"metadata": {
|
| 398 |
+
"kernelspec": {
|
| 399 |
+
"display_name": "pytorch",
|
| 400 |
+
"language": "python",
|
| 401 |
+
"name": "pytorch"
|
| 402 |
+
},
|
| 403 |
+
"language_info": {
|
| 404 |
+
"codemirror_mode": {
|
| 405 |
+
"name": "ipython",
|
| 406 |
+
"version": 3
|
| 407 |
+
},
|
| 408 |
+
"file_extension": ".py",
|
| 409 |
+
"mimetype": "text/x-python",
|
| 410 |
+
"name": "python",
|
| 411 |
+
"nbconvert_exporter": "python",
|
| 412 |
+
"pygments_lexer": "ipython3",
|
| 413 |
+
"version": "3.12.7"
|
| 414 |
+
}
|
| 415 |
+
},
|
| 416 |
+
"nbformat": 4,
|
| 417 |
+
"nbformat_minor": 5
|
| 418 |
+
}
|
course_feedback_nlp/evaluate.py
ADDED
|
@@ -0,0 +1,548 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Student Feedback Sentiment Model - Evaluation Script
|
| 3 |
+
====================================================
|
| 4 |
+
Run this after training to complete:
|
| 5 |
+
- Test evaluation
|
| 6 |
+
- Generate plots
|
| 7 |
+
- Save results
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import torch
|
| 11 |
+
import torch.nn.functional as F
|
| 12 |
+
from torch.utils.data import DataLoader, TensorDataset
|
| 13 |
+
import pandas as pd
|
| 14 |
+
import numpy as np
|
| 15 |
+
from sklearn.model_selection import train_test_split
|
| 16 |
+
from sklearn.metrics import classification_report, confusion_matrix
|
| 17 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
|
| 18 |
+
from tqdm.auto import tqdm
|
| 19 |
+
import matplotlib.pyplot as plt
|
| 20 |
+
import seaborn as sns
|
| 21 |
+
import os
|
| 22 |
+
import json
|
| 23 |
+
import gc
|
| 24 |
+
import warnings
|
| 25 |
+
warnings.filterwarnings('ignore')
|
| 26 |
+
|
| 27 |
+
# ============================================================
|
| 28 |
+
# CONFIGURATION (must match training!)
|
| 29 |
+
# ============================================================
|
| 30 |
+
|
| 31 |
+
CONFIG = {
|
| 32 |
+
'data_path': 'Coursera_reviews.csv',
|
| 33 |
+
'base_model': './distilbert-base-uncased',
|
| 34 |
+
'output_dir': 'teacher_sentiment_model',
|
| 35 |
+
'num_classes': 3,
|
| 36 |
+
'class_names': ['Negative', 'Neutral', 'Positive'],
|
| 37 |
+
'class_mapping': {
|
| 38 |
+
0: 0, # 1-star → Negative
|
| 39 |
+
1: 0, # 2-star → Negative
|
| 40 |
+
2: 1, # 3-star → Neutral
|
| 41 |
+
3: 2, # 4-star → Positive
|
| 42 |
+
4: 2, # 5-star → Positive
|
| 43 |
+
},
|
| 44 |
+
'max_length': 96,
|
| 45 |
+
'batch_size': 128,
|
| 46 |
+
'test_size': 0.1,
|
| 47 |
+
'seed': 42,
|
| 48 |
+
'num_workers': 4,
|
| 49 |
+
'use_amp': True,
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def set_seed(seed):
|
| 54 |
+
torch.manual_seed(seed)
|
| 55 |
+
torch.cuda.manual_seed_all(seed)
|
| 56 |
+
np.random.seed(seed)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def tokenize_batch(texts, tokenizer, max_length, desc="Tokenizing"):
|
| 60 |
+
all_input_ids = []
|
| 61 |
+
all_attention_masks = []
|
| 62 |
+
batch_size = 10000
|
| 63 |
+
|
| 64 |
+
for i in tqdm(range(0, len(texts), batch_size), desc=desc):
|
| 65 |
+
batch_texts = texts[i:i+batch_size].tolist()
|
| 66 |
+
encodings = tokenizer(
|
| 67 |
+
batch_texts,
|
| 68 |
+
truncation=True,
|
| 69 |
+
padding='max_length',
|
| 70 |
+
max_length=max_length,
|
| 71 |
+
return_tensors='pt'
|
| 72 |
+
)
|
| 73 |
+
all_input_ids.append(encodings['input_ids'])
|
| 74 |
+
all_attention_masks.append(encodings['attention_mask'])
|
| 75 |
+
|
| 76 |
+
return torch.cat(all_input_ids, dim=0), torch.cat(all_attention_masks, dim=0)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def main():
|
| 80 |
+
set_seed(CONFIG['seed'])
|
| 81 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 82 |
+
|
| 83 |
+
print("=" * 70)
|
| 84 |
+
print("STUDENT FEEDBACK SENTIMENT MODEL - EVALUATION")
|
| 85 |
+
print("=" * 70)
|
| 86 |
+
print(f"Device: {device}")
|
| 87 |
+
if torch.cuda.is_available():
|
| 88 |
+
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
| 89 |
+
print()
|
| 90 |
+
|
| 91 |
+
os.makedirs('plots', exist_ok=True)
|
| 92 |
+
|
| 93 |
+
# ============================================================
|
| 94 |
+
# FIX CONFIG.JSON (the bug from training)
|
| 95 |
+
# ============================================================
|
| 96 |
+
|
| 97 |
+
print("FIXING MODEL CONFIG")
|
| 98 |
+
print("-" * 70)
|
| 99 |
+
|
| 100 |
+
# Load original config from base model
|
| 101 |
+
original_config = AutoConfig.from_pretrained(
|
| 102 |
+
CONFIG['base_model'],
|
| 103 |
+
local_files_only=True
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
# Update for our task
|
| 107 |
+
original_config.num_labels = CONFIG['num_classes']
|
| 108 |
+
original_config.id2label = {i: name for i, name in enumerate(CONFIG['class_names'])}
|
| 109 |
+
original_config.label2id = {name: i for i, name in enumerate(CONFIG['class_names'])}
|
| 110 |
+
|
| 111 |
+
# Save corrected config
|
| 112 |
+
original_config.save_pretrained(CONFIG['output_dir'])
|
| 113 |
+
print(f" ✓ Fixed config.json in {CONFIG['output_dir']}/")
|
| 114 |
+
|
| 115 |
+
# Save our training config separately
|
| 116 |
+
training_config = {
|
| 117 |
+
'num_classes': CONFIG['num_classes'],
|
| 118 |
+
'class_names': CONFIG['class_names'],
|
| 119 |
+
'class_mapping': CONFIG['class_mapping'],
|
| 120 |
+
'max_length': CONFIG['max_length'],
|
| 121 |
+
}
|
| 122 |
+
with open(os.path.join(CONFIG['output_dir'], 'training_config.json'), 'w') as f:
|
| 123 |
+
json.dump(training_config, f, indent=2)
|
| 124 |
+
print(f" ✓ Saved training_config.json")
|
| 125 |
+
print()
|
| 126 |
+
|
| 127 |
+
# ============================================================
|
| 128 |
+
# LOAD DATA (only need test set)
|
| 129 |
+
# ============================================================
|
| 130 |
+
|
| 131 |
+
print("LOADING DATA")
|
| 132 |
+
print("-" * 70)
|
| 133 |
+
|
| 134 |
+
df = pd.read_csv(CONFIG['data_path'])
|
| 135 |
+
print(f"Raw data: {len(df):,} samples")
|
| 136 |
+
|
| 137 |
+
# Clean
|
| 138 |
+
df = df.dropna(subset=['reviews', 'rating'])
|
| 139 |
+
df = df[df['reviews'].str.strip() != '']
|
| 140 |
+
df['rating'] = df['rating'].astype(int)
|
| 141 |
+
df = df[df['rating'].between(1, 5)]
|
| 142 |
+
|
| 143 |
+
# Map to 3 classes
|
| 144 |
+
df['label_5class'] = df['rating'] - 1
|
| 145 |
+
df['label'] = df['label_5class'].map(CONFIG['class_mapping'])
|
| 146 |
+
|
| 147 |
+
print(f"Cleaned data: {len(df):,} samples")
|
| 148 |
+
|
| 149 |
+
# Get test split (same as training!)
|
| 150 |
+
_, X_test, _, y_test = train_test_split(
|
| 151 |
+
df['reviews'].values, df['label'].values,
|
| 152 |
+
test_size=CONFIG['test_size'],
|
| 153 |
+
random_state=CONFIG['seed'],
|
| 154 |
+
stratify=df['label'].values
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
print(f"Test samples: {len(X_test):,}")
|
| 158 |
+
print()
|
| 159 |
+
|
| 160 |
+
del df
|
| 161 |
+
gc.collect()
|
| 162 |
+
|
| 163 |
+
# ============================================================
|
| 164 |
+
# TOKENIZE TEST DATA
|
| 165 |
+
# ============================================================
|
| 166 |
+
|
| 167 |
+
print("TOKENIZATION")
|
| 168 |
+
print("-" * 70)
|
| 169 |
+
|
| 170 |
+
tokenizer = AutoTokenizer.from_pretrained(CONFIG['output_dir'], local_files_only=True)
|
| 171 |
+
|
| 172 |
+
test_ids, test_masks = tokenize_batch(X_test, tokenizer, CONFIG['max_length'], "Test")
|
| 173 |
+
test_labels = torch.tensor(y_test, dtype=torch.long)
|
| 174 |
+
|
| 175 |
+
test_dataset = TensorDataset(test_ids, test_masks, test_labels)
|
| 176 |
+
test_loader = DataLoader(
|
| 177 |
+
test_dataset,
|
| 178 |
+
batch_size=CONFIG['batch_size'],
|
| 179 |
+
shuffle=False,
|
| 180 |
+
num_workers=CONFIG['num_workers'],
|
| 181 |
+
pin_memory=True
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
print(f"Test batches: {len(test_loader):,}")
|
| 185 |
+
print()
|
| 186 |
+
|
| 187 |
+
del X_test, y_test
|
| 188 |
+
gc.collect()
|
| 189 |
+
|
| 190 |
+
# ============================================================
|
| 191 |
+
# LOAD MODEL
|
| 192 |
+
# ============================================================
|
| 193 |
+
|
| 194 |
+
print("LOADING MODEL")
|
| 195 |
+
print("-" * 70)
|
| 196 |
+
|
| 197 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
| 198 |
+
CONFIG['output_dir'],
|
| 199 |
+
local_files_only=True
|
| 200 |
+
)
|
| 201 |
+
model = model.to(device)
|
| 202 |
+
model.eval()
|
| 203 |
+
|
| 204 |
+
print(f" ✓ Model loaded from {CONFIG['output_dir']}/")
|
| 205 |
+
print(f" ✓ Num labels: {model.config.num_labels}")
|
| 206 |
+
print()
|
| 207 |
+
|
| 208 |
+
# ============================================================
|
| 209 |
+
# RUN TEST EVALUATION
|
| 210 |
+
# ============================================================
|
| 211 |
+
|
| 212 |
+
print("=" * 70)
|
| 213 |
+
print("FINAL TEST EVALUATION")
|
| 214 |
+
print("=" * 70)
|
| 215 |
+
|
| 216 |
+
all_preds = []
|
| 217 |
+
all_labels = []
|
| 218 |
+
all_probs = []
|
| 219 |
+
|
| 220 |
+
with torch.no_grad():
|
| 221 |
+
for input_ids, attention_mask, labels in tqdm(test_loader, desc="Testing", ncols=100):
|
| 222 |
+
input_ids = input_ids.to(device)
|
| 223 |
+
attention_mask = attention_mask.to(device)
|
| 224 |
+
|
| 225 |
+
if CONFIG['use_amp']:
|
| 226 |
+
with torch.amp.autocast('cuda'):
|
| 227 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
| 228 |
+
else:
|
| 229 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
| 230 |
+
|
| 231 |
+
probs = F.softmax(outputs.logits, dim=-1)
|
| 232 |
+
_, preds = outputs.logits.max(1)
|
| 233 |
+
|
| 234 |
+
all_preds.extend(preds.cpu().numpy())
|
| 235 |
+
all_labels.extend(labels.numpy())
|
| 236 |
+
all_probs.extend(probs.cpu().numpy())
|
| 237 |
+
|
| 238 |
+
all_preds = np.array(all_preds)
|
| 239 |
+
all_labels = np.array(all_labels)
|
| 240 |
+
all_probs = np.array(all_probs)
|
| 241 |
+
|
| 242 |
+
test_acc = 100 * (all_preds == all_labels).mean()
|
| 243 |
+
|
| 244 |
+
print()
|
| 245 |
+
print(f"Test Accuracy: {test_acc:.2f}%")
|
| 246 |
+
print()
|
| 247 |
+
|
| 248 |
+
# ============================================================
|
| 249 |
+
# CLASSIFICATION REPORT
|
| 250 |
+
# ============================================================
|
| 251 |
+
|
| 252 |
+
print("CLASSIFICATION REPORT")
|
| 253 |
+
print("-" * 70)
|
| 254 |
+
|
| 255 |
+
report = classification_report(
|
| 256 |
+
all_labels, all_preds,
|
| 257 |
+
target_names=CONFIG['class_names'],
|
| 258 |
+
digits=3,
|
| 259 |
+
output_dict=True
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
print(classification_report(
|
| 263 |
+
all_labels, all_preds,
|
| 264 |
+
target_names=CONFIG['class_names'],
|
| 265 |
+
digits=3
|
| 266 |
+
))
|
| 267 |
+
|
| 268 |
+
# ============================================================
|
| 269 |
+
# TEACHER-FOCUSED METRICS
|
| 270 |
+
# ============================================================
|
| 271 |
+
|
| 272 |
+
print()
|
| 273 |
+
print("=" * 70)
|
| 274 |
+
print("📊 TEACHER-FOCUSED METRICS")
|
| 275 |
+
print("=" * 70)
|
| 276 |
+
print()
|
| 277 |
+
|
| 278 |
+
# Negative class recall
|
| 279 |
+
negative_recall = report['Negative']['recall'] * 100
|
| 280 |
+
negative_precision = report['Negative']['precision'] * 100
|
| 281 |
+
negative_f1 = report['Negative']['f1-score'] * 100
|
| 282 |
+
|
| 283 |
+
print(f" 🔴 NEGATIVE FEEDBACK DETECTION (Struggling Students):")
|
| 284 |
+
print(f" Recall: {negative_recall:.1f}% ← Catches {negative_recall:.0f}% of struggling students")
|
| 285 |
+
print(f" Precision: {negative_precision:.1f}% ← {negative_precision:.0f}% of flags are real issues")
|
| 286 |
+
print(f" F1-Score: {negative_f1:.1f}%")
|
| 287 |
+
print()
|
| 288 |
+
|
| 289 |
+
# False negative analysis
|
| 290 |
+
false_negatives = ((all_labels == 0) & (all_preds != 0)).sum()
|
| 291 |
+
total_negatives = (all_labels == 0).sum()
|
| 292 |
+
missed_pct = 100 * false_negatives / total_negatives if total_negatives > 0 else 0
|
| 293 |
+
|
| 294 |
+
print(f" ⚠️ MISSED STRUGGLING STUDENTS:")
|
| 295 |
+
print(f" {false_negatives:,} of {total_negatives:,} negative cases missed ({missed_pct:.1f}%)")
|
| 296 |
+
print()
|
| 297 |
+
|
| 298 |
+
# Where did false negatives go?
|
| 299 |
+
fn_mask = (all_labels == 0) & (all_preds != 0)
|
| 300 |
+
if fn_mask.sum() > 0:
|
| 301 |
+
fn_preds = all_preds[fn_mask]
|
| 302 |
+
fn_to_neutral = (fn_preds == 1).sum()
|
| 303 |
+
fn_to_positive = (fn_preds == 2).sum()
|
| 304 |
+
print(f" Misclassified as Neutral: {fn_to_neutral:,}")
|
| 305 |
+
print(f" Misclassified as Positive: {fn_to_positive:,}")
|
| 306 |
+
print()
|
| 307 |
+
|
| 308 |
+
# Confidence analysis
|
| 309 |
+
pred_confidence = all_probs.max(axis=1)
|
| 310 |
+
low_confidence = (pred_confidence < 0.7).sum()
|
| 311 |
+
low_conf_pct = 100 * low_confidence / len(pred_confidence)
|
| 312 |
+
|
| 313 |
+
print(f" 🤔 UNCERTAIN PREDICTIONS (confidence < 70%):")
|
| 314 |
+
print(f" {low_confidence:,} of {len(pred_confidence):,} predictions ({low_conf_pct:.1f}%)")
|
| 315 |
+
print(f" → These should be flagged for manual review")
|
| 316 |
+
print()
|
| 317 |
+
|
| 318 |
+
# Confidence by class
|
| 319 |
+
print(f" 📈 AVERAGE CONFIDENCE BY PREDICTION:")
|
| 320 |
+
for i, name in enumerate(CONFIG['class_names']):
|
| 321 |
+
mask = all_preds == i
|
| 322 |
+
if mask.sum() > 0:
|
| 323 |
+
avg_conf = pred_confidence[mask].mean() * 100
|
| 324 |
+
emoji = ['🔴', '🟡', '🟢'][i]
|
| 325 |
+
print(f" {emoji} {name}: {avg_conf:.1f}%")
|
| 326 |
+
print()
|
| 327 |
+
|
| 328 |
+
# ============================================================
|
| 329 |
+
# CONFUSION MATRIX PLOT
|
| 330 |
+
# ============================================================
|
| 331 |
+
|
| 332 |
+
print("GENERATING PLOTS")
|
| 333 |
+
print("-" * 70)
|
| 334 |
+
|
| 335 |
+
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
|
| 336 |
+
|
| 337 |
+
cm = confusion_matrix(all_labels, all_preds)
|
| 338 |
+
|
| 339 |
+
# Counts
|
| 340 |
+
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
|
| 341 |
+
xticklabels=CONFIG['class_names'],
|
| 342 |
+
yticklabels=CONFIG['class_names'], ax=axes[0],
|
| 343 |
+
annot_kws={'size': 14})
|
| 344 |
+
axes[0].set_xlabel('Predicted', fontsize=12)
|
| 345 |
+
axes[0].set_ylabel('Actual', fontsize=12)
|
| 346 |
+
axes[0].set_title('Confusion Matrix (Counts)', fontsize=14)
|
| 347 |
+
|
| 348 |
+
# Normalized (Recall)
|
| 349 |
+
cm_norm = cm.astype(float) / cm.sum(axis=1, keepdims=True)
|
| 350 |
+
sns.heatmap(cm_norm, annot=True, fmt='.1%', cmap='Blues',
|
| 351 |
+
xticklabels=CONFIG['class_names'],
|
| 352 |
+
yticklabels=CONFIG['class_names'], ax=axes[1],
|
| 353 |
+
annot_kws={'size': 14})
|
| 354 |
+
axes[1].set_xlabel('Predicted', fontsize=12)
|
| 355 |
+
axes[1].set_ylabel('Actual', fontsize=12)
|
| 356 |
+
axes[1].set_title('Confusion Matrix (Recall per Class)', fontsize=14)
|
| 357 |
+
|
| 358 |
+
plt.tight_layout()
|
| 359 |
+
plt.savefig('plots/confusion_matrix_3class.png', dpi=150, bbox_inches='tight')
|
| 360 |
+
print(" ✓ Saved: plots/confusion_matrix_3class.png")
|
| 361 |
+
|
| 362 |
+
# ============================================================
|
| 363 |
+
# PER-CLASS METRICS PLOT
|
| 364 |
+
# ============================================================
|
| 365 |
+
|
| 366 |
+
fig, ax = plt.subplots(figsize=(12, 6))
|
| 367 |
+
x = np.arange(3)
|
| 368 |
+
width = 0.25
|
| 369 |
+
|
| 370 |
+
recalls = [report[c]['recall'] * 100 for c in CONFIG['class_names']]
|
| 371 |
+
precisions = [report[c]['precision'] * 100 for c in CONFIG['class_names']]
|
| 372 |
+
f1s = [report[c]['f1-score'] * 100 for c in CONFIG['class_names']]
|
| 373 |
+
|
| 374 |
+
bars1 = ax.bar(x - width, recalls, width, label='Recall', color='#e74c3c', edgecolor='black')
|
| 375 |
+
bars2 = ax.bar(x, precisions, width, label='Precision', color='#3498db', edgecolor='black')
|
| 376 |
+
bars3 = ax.bar(x + width, f1s, width, label='F1-Score', color='#2ecc71', edgecolor='black')
|
| 377 |
+
|
| 378 |
+
ax.set_ylabel('Score (%)', fontsize=12)
|
| 379 |
+
ax.set_title('Per-Class Metrics (Teacher Sentiment Model)', fontsize=14)
|
| 380 |
+
ax.set_xticks(x)
|
| 381 |
+
ax.set_xticklabels([
|
| 382 |
+
'🔴 Negative\n(Needs Attention)',
|
| 383 |
+
'🟡 Neutral\n(Mixed/Unclear)',
|
| 384 |
+
'🟢 Positive\n(Satisfied)'
|
| 385 |
+
], fontsize=11)
|
| 386 |
+
ax.legend(fontsize=11)
|
| 387 |
+
ax.set_ylim(0, 105)
|
| 388 |
+
ax.axhline(y=90, color='green', linestyle='--', alpha=0.5, label='90% target')
|
| 389 |
+
ax.grid(True, alpha=0.3, axis='y')
|
| 390 |
+
|
| 391 |
+
# Add value labels
|
| 392 |
+
for bars in [bars1, bars2, bars3]:
|
| 393 |
+
for bar in bars:
|
| 394 |
+
height = bar.get_height()
|
| 395 |
+
ax.annotate(f'{height:.1f}%',
|
| 396 |
+
xy=(bar.get_x() + bar.get_width() / 2, height),
|
| 397 |
+
xytext=(0, 3),
|
| 398 |
+
textcoords="offset points",
|
| 399 |
+
ha='center', va='bottom', fontsize=10, fontweight='bold')
|
| 400 |
+
|
| 401 |
+
plt.tight_layout()
|
| 402 |
+
plt.savefig('plots/per_class_metrics_3class.png', dpi=150, bbox_inches='tight')
|
| 403 |
+
print(" ✓ Saved: plots/per_class_metrics_3class.png")
|
| 404 |
+
|
| 405 |
+
# ============================================================
|
| 406 |
+
# CONFIDENCE DISTRIBUTION PLOT
|
| 407 |
+
# ============================================================
|
| 408 |
+
|
| 409 |
+
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
|
| 410 |
+
|
| 411 |
+
# Overall confidence distribution
|
| 412 |
+
axes[0].hist(pred_confidence, bins=50, color='steelblue', edgecolor='black', alpha=0.7)
|
| 413 |
+
axes[0].axvline(x=0.7, color='red', linestyle='--', linewidth=2, label='70% threshold')
|
| 414 |
+
axes[0].set_xlabel('Confidence', fontsize=12)
|
| 415 |
+
axes[0].set_ylabel('Count', fontsize=12)
|
| 416 |
+
axes[0].set_title('Prediction Confidence Distribution', fontsize=14)
|
| 417 |
+
axes[0].legend()
|
| 418 |
+
axes[0].grid(True, alpha=0.3)
|
| 419 |
+
|
| 420 |
+
# Confidence by class
|
| 421 |
+
colors = ['#e74c3c', '#f39c12', '#27ae60']
|
| 422 |
+
for i, (name, color) in enumerate(zip(CONFIG['class_names'], colors)):
|
| 423 |
+
mask = all_preds == i
|
| 424 |
+
if mask.sum() > 0:
|
| 425 |
+
axes[1].hist(pred_confidence[mask], bins=30, alpha=0.5, label=name, color=color)
|
| 426 |
+
|
| 427 |
+
axes[1].axvline(x=0.7, color='red', linestyle='--', linewidth=2, label='70% threshold')
|
| 428 |
+
axes[1].set_xlabel('Confidence', fontsize=12)
|
| 429 |
+
axes[1].set_ylabel('Count', fontsize=12)
|
| 430 |
+
axes[1].set_title('Confidence by Predicted Class', fontsize=14)
|
| 431 |
+
axes[1].legend()
|
| 432 |
+
axes[1].grid(True, alpha=0.3)
|
| 433 |
+
|
| 434 |
+
plt.tight_layout()
|
| 435 |
+
plt.savefig('plots/confidence_distribution.png', dpi=150, bbox_inches='tight')
|
| 436 |
+
print(" ✓ Saved: plots/confidence_distribution.png")
|
| 437 |
+
|
| 438 |
+
# ============================================================
|
| 439 |
+
# ERROR ANALYSIS PLOT
|
| 440 |
+
# ============================================================
|
| 441 |
+
|
| 442 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 443 |
+
|
| 444 |
+
# Calculate error rates
|
| 445 |
+
error_rates = []
|
| 446 |
+
for i, name in enumerate(CONFIG['class_names']):
|
| 447 |
+
mask = all_labels == i
|
| 448 |
+
errors = (all_preds[mask] != all_labels[mask]).sum()
|
| 449 |
+
total = mask.sum()
|
| 450 |
+
error_rate = 100 * errors / total if total > 0 else 0
|
| 451 |
+
error_rates.append(error_rate)
|
| 452 |
+
|
| 453 |
+
colors = ['#e74c3c', '#f39c12', '#27ae60']
|
| 454 |
+
bars = ax.bar(CONFIG['class_names'], error_rates, color=colors, edgecolor='black', linewidth=1.5)
|
| 455 |
+
|
| 456 |
+
ax.set_ylabel('Error Rate (%)', fontsize=12)
|
| 457 |
+
ax.set_title('Error Rate by True Class', fontsize=14)
|
| 458 |
+
ax.set_ylim(0, max(error_rates) * 1.2 if max(error_rates) > 0 else 10)
|
| 459 |
+
ax.grid(True, alpha=0.3, axis='y')
|
| 460 |
+
|
| 461 |
+
for bar, rate in zip(bars, error_rates):
|
| 462 |
+
ax.annotate(f'{rate:.1f}%',
|
| 463 |
+
xy=(bar.get_x() + bar.get_width() / 2, bar.get_height()),
|
| 464 |
+
xytext=(0, 3),
|
| 465 |
+
textcoords="offset points",
|
| 466 |
+
ha='center', va='bottom', fontsize=12, fontweight='bold')
|
| 467 |
+
|
| 468 |
+
plt.tight_layout()
|
| 469 |
+
plt.savefig('plots/error_analysis.png', dpi=150, bbox_inches='tight')
|
| 470 |
+
print(" ✓ Saved: plots/error_analysis.png")
|
| 471 |
+
|
| 472 |
+
# ============================================================
|
| 473 |
+
# SAVE RESULTS
|
| 474 |
+
# ============================================================
|
| 475 |
+
|
| 476 |
+
print()
|
| 477 |
+
print("SAVING RESULTS")
|
| 478 |
+
print("-" * 70)
|
| 479 |
+
|
| 480 |
+
results = {
|
| 481 |
+
'test_accuracy': float(test_acc),
|
| 482 |
+
'negative_recall': float(negative_recall),
|
| 483 |
+
'negative_precision': float(negative_precision),
|
| 484 |
+
'negative_f1': float(negative_f1),
|
| 485 |
+
'neutral_recall': float(report['Neutral']['recall'] * 100),
|
| 486 |
+
'positive_recall': float(report['Positive']['recall'] * 100),
|
| 487 |
+
'missed_struggling_students': int(false_negatives),
|
| 488 |
+
'total_negative_cases': int(total_negatives),
|
| 489 |
+
'missed_percentage': float(missed_pct),
|
| 490 |
+
'low_confidence_predictions': int(low_confidence),
|
| 491 |
+
'low_confidence_percentage': float(low_conf_pct),
|
| 492 |
+
'macro_f1': float(report['macro avg']['f1-score'] * 100),
|
| 493 |
+
'weighted_f1': float(report['weighted avg']['f1-score'] * 100),
|
| 494 |
+
}
|
| 495 |
+
|
| 496 |
+
# Save as JSON
|
| 497 |
+
with open(os.path.join(CONFIG['output_dir'], 'results.json'), 'w') as f:
|
| 498 |
+
json.dump(results, f, indent=2)
|
| 499 |
+
print(f" ✓ Saved: {CONFIG['output_dir']}/results.json")
|
| 500 |
+
|
| 501 |
+
# Save full results as PyTorch
|
| 502 |
+
full_results = {
|
| 503 |
+
**results,
|
| 504 |
+
'config': CONFIG,
|
| 505 |
+
'classification_report': report,
|
| 506 |
+
'confusion_matrix': cm.tolist(),
|
| 507 |
+
'all_predictions': all_preds.tolist(),
|
| 508 |
+
'all_labels': all_labels.tolist(),
|
| 509 |
+
}
|
| 510 |
+
torch.save(full_results, os.path.join(CONFIG['output_dir'], 'results.pt'))
|
| 511 |
+
print(f" ✓ Saved: {CONFIG['output_dir']}/results.pt")
|
| 512 |
+
|
| 513 |
+
# ============================================================
|
| 514 |
+
# FINAL SUMMARY
|
| 515 |
+
# ============================================================
|
| 516 |
+
|
| 517 |
+
print()
|
| 518 |
+
print("=" * 70)
|
| 519 |
+
print("🎉 EVALUATION COMPLETE!")
|
| 520 |
+
print("=" * 70)
|
| 521 |
+
print()
|
| 522 |
+
print(" RESULTS SUMMARY:")
|
| 523 |
+
print(f" Test Accuracy: {test_acc:.2f}%")
|
| 524 |
+
print(f" Macro F1-Score: {report['macro avg']['f1-score']*100:.2f}%")
|
| 525 |
+
print(f" Weighted F1-Score: {report['weighted avg']['f1-score']*100:.2f}%")
|
| 526 |
+
print()
|
| 527 |
+
print(" PER-CLASS RECALL (most important for teachers):")
|
| 528 |
+
print(f" 🔴 Negative: {negative_recall:.1f}% ← Catches {100-missed_pct:.0f}% of struggling students")
|
| 529 |
+
print(f" 🟡 Neutral: {report['Neutral']['recall']*100:.1f}%")
|
| 530 |
+
print(f" 🟢 Positive: {report['Positive']['recall']*100:.1f}%")
|
| 531 |
+
print()
|
| 532 |
+
print(" KEY INSIGHTS:")
|
| 533 |
+
print(f" • {false_negatives:,} struggling students would be missed ({missed_pct:.1f}%)")
|
| 534 |
+
print(f" • {low_confidence:,} predictions need manual review ({low_conf_pct:.1f}%)")
|
| 535 |
+
print()
|
| 536 |
+
print(" FILES SAVED:")
|
| 537 |
+
print(f" • {CONFIG['output_dir']}/results.json")
|
| 538 |
+
print(f" • {CONFIG['output_dir']}/results.pt")
|
| 539 |
+
print(f" • plots/confusion_matrix_3class.png")
|
| 540 |
+
print(f" • plots/per_class_metrics_3class.png")
|
| 541 |
+
print(f" • plots/confidence_distribution.png")
|
| 542 |
+
print(f" • plots/error_analysis.png")
|
| 543 |
+
print()
|
| 544 |
+
print("=" * 70)
|
| 545 |
+
|
| 546 |
+
|
| 547 |
+
if __name__ == '__main__':
|
| 548 |
+
main()
|
course_feedback_nlp/requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tqdm
|
| 2 |
+
pandas
|
| 3 |
+
numpy
|
| 4 |
+
scikit-learn
|
| 5 |
+
seaborn
|
| 6 |
+
matplotlib
|
| 7 |
+
transformers
|
course_feedback_nlp/test.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# save as predict.py
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 5 |
+
|
| 6 |
+
def predict(text):
|
| 7 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 8 |
+
|
| 9 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
| 10 |
+
'sentiment_model', local_files_only=True
|
| 11 |
+
).to(device)
|
| 12 |
+
model.eval()
|
| 13 |
+
|
| 14 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 15 |
+
'sentiment_model', local_files_only=True
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
inputs = tokenizer(
|
| 19 |
+
text,
|
| 20 |
+
return_tensors='pt',
|
| 21 |
+
truncation=True,
|
| 22 |
+
max_length=96,
|
| 23 |
+
padding='max_length'
|
| 24 |
+
).to(device)
|
| 25 |
+
|
| 26 |
+
with torch.no_grad():
|
| 27 |
+
with torch.amp.autocast('cuda'):
|
| 28 |
+
outputs = model(**inputs)
|
| 29 |
+
|
| 30 |
+
probs = torch.softmax(outputs.logits, dim=1)
|
| 31 |
+
pred_class = outputs.logits.argmax(dim=1).item() + 1 # 1-5
|
| 32 |
+
confidence = probs[0][pred_class - 1].item()
|
| 33 |
+
|
| 34 |
+
return {
|
| 35 |
+
'rating': pred_class,
|
| 36 |
+
'confidence': f'{confidence:.1%}',
|
| 37 |
+
'all_probs': {i+1: f'{p:.1%}' for i, p in enumerate(probs[0])}
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
if __name__ == '__main__':
|
| 41 |
+
tests = [
|
| 42 |
+
"This course was amazing! Best I've ever taken!",
|
| 43 |
+
"Terrible waste of time. Very boring.",
|
| 44 |
+
"It was okay, nothing special.",
|
| 45 |
+
"Good course but could be better organized.",
|
| 46 |
+
"Absolutely fantastic! Highly recommend!"
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
for text in tests:
|
| 50 |
+
result = predict(text)
|
| 51 |
+
print(f"\n'{text[:50]}...'")
|
| 52 |
+
print(f" → Predicted: {result['rating']} stars ({result['confidence']})")
|
course_feedback_nlp/train.py
ADDED
|
@@ -0,0 +1,862 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Course Review Sentiment Model - Training Script
|
| 3 |
+
VRAM Optimized for AMD 7900 XTX (24GB)
|
| 4 |
+
|
| 5 |
+
PATCHES APPLIED:
|
| 6 |
+
- Class weights to handle imbalanced data (78.8% are 5-star reviews)
|
| 7 |
+
- Optimized batch_size=128 for better accuracy
|
| 8 |
+
- max_length=96 for faster training
|
| 9 |
+
- AMD crash protection and emergency checkpointing
|
| 10 |
+
- Periodic checkpoint saving (every epoch)
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import torch
|
| 14 |
+
import torch.nn as nn
|
| 15 |
+
from torch.utils.data import Dataset, DataLoader, TensorDataset
|
| 16 |
+
import pandas as pd
|
| 17 |
+
import numpy as np
|
| 18 |
+
from sklearn.model_selection import train_test_split
|
| 19 |
+
from sklearn.metrics import classification_report, confusion_matrix
|
| 20 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 21 |
+
from transformers import get_linear_schedule_with_warmup
|
| 22 |
+
from tqdm.auto import tqdm
|
| 23 |
+
import matplotlib.pyplot as plt
|
| 24 |
+
import seaborn as sns
|
| 25 |
+
import os
|
| 26 |
+
import time
|
| 27 |
+
import gc
|
| 28 |
+
import warnings
|
| 29 |
+
warnings.filterwarnings('ignore')
|
| 30 |
+
|
| 31 |
+
# ============================================================
|
| 32 |
+
# AMD CRASH PROTECTION - Suppress problematic logging
|
| 33 |
+
# ============================================================
|
| 34 |
+
os.environ['AMD_LOG_LEVEL'] = '0'
|
| 35 |
+
os.environ['ROCM_LOG_LEVEL'] = '0'
|
| 36 |
+
os.environ['HIP_VISIBLE_DEVICES'] = '0'
|
| 37 |
+
|
| 38 |
+
# ============================================================
|
| 39 |
+
# CONFIGURATION
|
| 40 |
+
# ============================================================
|
| 41 |
+
|
| 42 |
+
CONFIG = {
|
| 43 |
+
'data_path': 'Coursera_reviews.csv',
|
| 44 |
+
'model_name': './distilbert-base-uncased',
|
| 45 |
+
'output_dir': 'sentiment_model',
|
| 46 |
+
'checkpoint_dir': 'checkpoints', # NEW: For periodic saves
|
| 47 |
+
'max_length': 96, # CHANGED: 128 → 96 (faster, minimal accuracy loss)
|
| 48 |
+
'batch_size': 128, # CHANGED: 512 → 128 (better accuracy per Run 3)
|
| 49 |
+
'epochs': 5,
|
| 50 |
+
'learning_rate': 2e-5,
|
| 51 |
+
'weight_decay': 0.01,
|
| 52 |
+
'warmup_ratio': 0.1,
|
| 53 |
+
'train_size': 0.8,
|
| 54 |
+
'val_size': 0.1,
|
| 55 |
+
'test_size': 0.1,
|
| 56 |
+
'seed': 42,
|
| 57 |
+
'num_workers': 4,
|
| 58 |
+
'pin_memory': True,
|
| 59 |
+
'use_amp': True, # Mixed precision for speed
|
| 60 |
+
'use_class_weights': True, # NEW: Address class imbalance
|
| 61 |
+
'checkpoint_every_epoch': True, # NEW: Save checkpoint every epoch
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
# ============================================================
|
| 65 |
+
# MAIN FUNCTION
|
| 66 |
+
# ============================================================
|
| 67 |
+
|
| 68 |
+
def main():
|
| 69 |
+
# ============================================================
|
| 70 |
+
# SETUP
|
| 71 |
+
# ============================================================
|
| 72 |
+
|
| 73 |
+
def set_seed(seed):
|
| 74 |
+
torch.manual_seed(seed)
|
| 75 |
+
torch.cuda.manual_seed_all(seed)
|
| 76 |
+
np.random.seed(seed)
|
| 77 |
+
|
| 78 |
+
set_seed(CONFIG['seed'])
|
| 79 |
+
|
| 80 |
+
# Device
|
| 81 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 82 |
+
print("=" * 70)
|
| 83 |
+
print("DEVICE INFORMATION")
|
| 84 |
+
print("=" * 70)
|
| 85 |
+
print(f" Device: {device}")
|
| 86 |
+
if torch.cuda.is_available():
|
| 87 |
+
print(f" GPU: {torch.cuda.get_device_name(0)}")
|
| 88 |
+
total_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
|
| 89 |
+
print(f" Memory: {total_mem:.2f} GB")
|
| 90 |
+
print("=" * 70)
|
| 91 |
+
print()
|
| 92 |
+
|
| 93 |
+
# Create directories early
|
| 94 |
+
os.makedirs(CONFIG['output_dir'], exist_ok=True)
|
| 95 |
+
os.makedirs(CONFIG['checkpoint_dir'], exist_ok=True)
|
| 96 |
+
os.makedirs('plots', exist_ok=True)
|
| 97 |
+
|
| 98 |
+
# ============================================================
|
| 99 |
+
# VERIFY LOCAL MODEL EXISTS
|
| 100 |
+
# ============================================================
|
| 101 |
+
|
| 102 |
+
print("=" * 70)
|
| 103 |
+
print("VERIFYING LOCAL MODEL")
|
| 104 |
+
print("=" * 70)
|
| 105 |
+
|
| 106 |
+
model_path = CONFIG['model_name']
|
| 107 |
+
if os.path.exists(model_path):
|
| 108 |
+
print(f" ✓ Model directory found: {model_path}")
|
| 109 |
+
else:
|
| 110 |
+
print(f" ✗ Model directory NOT found: {model_path}")
|
| 111 |
+
return
|
| 112 |
+
|
| 113 |
+
print("=" * 70)
|
| 114 |
+
print()
|
| 115 |
+
|
| 116 |
+
# ============================================================
|
| 117 |
+
# DATA LOADING
|
| 118 |
+
# ============================================================
|
| 119 |
+
|
| 120 |
+
print("=" * 70)
|
| 121 |
+
print("DATA LOADING")
|
| 122 |
+
print("=" * 70)
|
| 123 |
+
|
| 124 |
+
print("Loading data...")
|
| 125 |
+
df = pd.read_csv(CONFIG['data_path'])
|
| 126 |
+
print(f" Raw data shape: {df.shape}")
|
| 127 |
+
|
| 128 |
+
# Clean data
|
| 129 |
+
df = df.dropna(subset=['reviews', 'rating'])
|
| 130 |
+
df = df[df['reviews'].str.strip() != '']
|
| 131 |
+
df['rating'] = df['rating'].astype(int)
|
| 132 |
+
df = df[df['rating'].between(1, 5)]
|
| 133 |
+
df['label'] = df['rating'] - 1
|
| 134 |
+
|
| 135 |
+
print(f" Cleaned data shape: {df.shape}")
|
| 136 |
+
print(f"\n Rating distribution:")
|
| 137 |
+
for rating, count in df['rating'].value_counts().sort_index().items():
|
| 138 |
+
pct = 100 * count / len(df)
|
| 139 |
+
bar = "█" * int(pct / 2)
|
| 140 |
+
print(f" {rating} Star: {count:>8,} ({pct:>5.1f}%) {bar}")
|
| 141 |
+
|
| 142 |
+
# ============================================================
|
| 143 |
+
# CALCULATE CLASS WEIGHTS (Before deleting df!)
|
| 144 |
+
# ============================================================
|
| 145 |
+
|
| 146 |
+
if CONFIG['use_class_weights']:
|
| 147 |
+
print(f"\n Calculating class weights...")
|
| 148 |
+
class_counts = df['label'].value_counts().sort_index().values
|
| 149 |
+
# Inverse frequency weighting
|
| 150 |
+
class_weights = 1.0 / class_counts
|
| 151 |
+
# Normalize so weights sum to num_classes
|
| 152 |
+
class_weights = class_weights / class_weights.sum() * len(class_counts)
|
| 153 |
+
class_weights = torch.tensor(class_weights, dtype=torch.float32)
|
| 154 |
+
|
| 155 |
+
print(f" Class weights (to balance {class_counts[-1]/class_counts[0]:.1f}x imbalance):")
|
| 156 |
+
for i, (w, c) in enumerate(zip(class_weights, class_counts)):
|
| 157 |
+
print(f" {i+1} Star: weight={w:.4f} (count={c:,})")
|
| 158 |
+
else:
|
| 159 |
+
class_weights = None
|
| 160 |
+
|
| 161 |
+
print("=" * 70)
|
| 162 |
+
print()
|
| 163 |
+
|
| 164 |
+
# ============================================================
|
| 165 |
+
# TRAIN / VALIDATION / TEST SPLIT
|
| 166 |
+
# ============================================================
|
| 167 |
+
|
| 168 |
+
print("=" * 70)
|
| 169 |
+
print("DATA SPLITTING")
|
| 170 |
+
print("=" * 70)
|
| 171 |
+
|
| 172 |
+
X_temp, X_test, y_temp, y_test = train_test_split(
|
| 173 |
+
df['reviews'].values,
|
| 174 |
+
df['label'].values,
|
| 175 |
+
test_size=CONFIG['test_size'],
|
| 176 |
+
random_state=CONFIG['seed'],
|
| 177 |
+
stratify=df['label'].values
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
relative_val_size = CONFIG['val_size'] / (CONFIG['train_size'] + CONFIG['val_size'])
|
| 181 |
+
|
| 182 |
+
X_train, X_val, y_train, y_val = train_test_split(
|
| 183 |
+
X_temp,
|
| 184 |
+
y_temp,
|
| 185 |
+
test_size=relative_val_size,
|
| 186 |
+
random_state=CONFIG['seed'],
|
| 187 |
+
stratify=y_temp
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
print(f" Training samples: {len(X_train):>10,} ({100*len(X_train)/len(df):.1f}%)")
|
| 191 |
+
print(f" Validation samples: {len(X_val):>10,} ({100*len(X_val)/len(df):.1f}%)")
|
| 192 |
+
print(f" Test samples: {len(X_test):>10,} ({100*len(X_test)/len(df):.1f}%)")
|
| 193 |
+
print("=" * 70)
|
| 194 |
+
print()
|
| 195 |
+
|
| 196 |
+
# Now we can delete df
|
| 197 |
+
del df
|
| 198 |
+
gc.collect()
|
| 199 |
+
|
| 200 |
+
# ============================================================
|
| 201 |
+
# TOKENIZER
|
| 202 |
+
# ============================================================
|
| 203 |
+
|
| 204 |
+
print("Loading tokenizer...")
|
| 205 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 206 |
+
CONFIG['model_name'],
|
| 207 |
+
local_files_only=True
|
| 208 |
+
)
|
| 209 |
+
print(f" ✓ Tokenizer loaded")
|
| 210 |
+
print()
|
| 211 |
+
|
| 212 |
+
# ============================================================
|
| 213 |
+
# PRE-TOKENIZE ALL DATA (Key optimization!)
|
| 214 |
+
# ============================================================
|
| 215 |
+
|
| 216 |
+
print("=" * 70)
|
| 217 |
+
print("PRE-TOKENIZING ALL DATA")
|
| 218 |
+
print("=" * 70)
|
| 219 |
+
print(" This runs once and stores tensors for fast loading...")
|
| 220 |
+
print()
|
| 221 |
+
|
| 222 |
+
def tokenize_batch(texts, desc="Tokenizing"):
|
| 223 |
+
"""Tokenize all texts at once using batch processing"""
|
| 224 |
+
all_input_ids = []
|
| 225 |
+
all_attention_masks = []
|
| 226 |
+
|
| 227 |
+
batch_size = 10000 # Process 10k at a time to avoid memory issues
|
| 228 |
+
|
| 229 |
+
for i in tqdm(range(0, len(texts), batch_size), desc=desc):
|
| 230 |
+
batch_texts = texts[i:i+batch_size].tolist()
|
| 231 |
+
|
| 232 |
+
encodings = tokenizer(
|
| 233 |
+
batch_texts,
|
| 234 |
+
truncation=True,
|
| 235 |
+
padding='max_length',
|
| 236 |
+
max_length=CONFIG['max_length'],
|
| 237 |
+
return_tensors='pt'
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
all_input_ids.append(encodings['input_ids'])
|
| 241 |
+
all_attention_masks.append(encodings['attention_mask'])
|
| 242 |
+
|
| 243 |
+
return (
|
| 244 |
+
torch.cat(all_input_ids, dim=0),
|
| 245 |
+
torch.cat(all_attention_masks, dim=0)
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
# Tokenize train
|
| 249 |
+
print(" Tokenizing training data...")
|
| 250 |
+
train_input_ids, train_attention_masks = tokenize_batch(X_train, " Train")
|
| 251 |
+
train_labels = torch.tensor(y_train, dtype=torch.long)
|
| 252 |
+
|
| 253 |
+
# Tokenize validation
|
| 254 |
+
print(" Tokenizing validation data...")
|
| 255 |
+
val_input_ids, val_attention_masks = tokenize_batch(X_val, " Val")
|
| 256 |
+
val_labels = torch.tensor(y_val, dtype=torch.long)
|
| 257 |
+
|
| 258 |
+
# Tokenize test
|
| 259 |
+
print(" Tokenizing test data...")
|
| 260 |
+
test_input_ids, test_attention_masks = tokenize_batch(X_test, " Test")
|
| 261 |
+
test_labels = torch.tensor(y_test, dtype=torch.long)
|
| 262 |
+
|
| 263 |
+
# Free memory
|
| 264 |
+
del X_train, X_val, X_test, y_train, y_val, y_test, X_temp, y_temp
|
| 265 |
+
gc.collect()
|
| 266 |
+
|
| 267 |
+
print()
|
| 268 |
+
print(f" ✓ Train tensors: {train_input_ids.shape}")
|
| 269 |
+
print(f" ✓ Val tensors: {val_input_ids.shape}")
|
| 270 |
+
print(f" ✓ Test tensors: {test_input_ids.shape}")
|
| 271 |
+
print("=" * 70)
|
| 272 |
+
print()
|
| 273 |
+
|
| 274 |
+
# ============================================================
|
| 275 |
+
# CREATE TENSOR DATASETS (Fast!)
|
| 276 |
+
# ============================================================
|
| 277 |
+
|
| 278 |
+
train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels)
|
| 279 |
+
val_dataset = TensorDataset(val_input_ids, val_attention_masks, val_labels)
|
| 280 |
+
test_dataset = TensorDataset(test_input_ids, test_attention_masks, test_labels)
|
| 281 |
+
|
| 282 |
+
# ============================================================
|
| 283 |
+
# DATALOADERS
|
| 284 |
+
# ============================================================
|
| 285 |
+
|
| 286 |
+
print("Creating dataloaders...")
|
| 287 |
+
|
| 288 |
+
train_loader = DataLoader(
|
| 289 |
+
train_dataset,
|
| 290 |
+
batch_size=CONFIG['batch_size'],
|
| 291 |
+
shuffle=True,
|
| 292 |
+
num_workers=CONFIG['num_workers'],
|
| 293 |
+
pin_memory=CONFIG['pin_memory'],
|
| 294 |
+
persistent_workers=True # NEW: Keep workers alive between epochs
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
val_loader = DataLoader(
|
| 298 |
+
val_dataset,
|
| 299 |
+
batch_size=CONFIG['batch_size'],
|
| 300 |
+
shuffle=False,
|
| 301 |
+
num_workers=CONFIG['num_workers'],
|
| 302 |
+
pin_memory=CONFIG['pin_memory'],
|
| 303 |
+
persistent_workers=True
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
test_loader = DataLoader(
|
| 307 |
+
test_dataset,
|
| 308 |
+
batch_size=CONFIG['batch_size'],
|
| 309 |
+
shuffle=False,
|
| 310 |
+
num_workers=CONFIG['num_workers'],
|
| 311 |
+
pin_memory=CONFIG['pin_memory'],
|
| 312 |
+
persistent_workers=True
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
print(f" ✓ Train batches: {len(train_loader):,}")
|
| 316 |
+
print(f" ✓ Validation batches: {len(val_loader):,}")
|
| 317 |
+
print(f" ✓ Test batches: {len(test_loader):,}")
|
| 318 |
+
print()
|
| 319 |
+
|
| 320 |
+
# ============================================================
|
| 321 |
+
# MODEL
|
| 322 |
+
# ============================================================
|
| 323 |
+
|
| 324 |
+
print("Loading model...")
|
| 325 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
| 326 |
+
CONFIG['model_name'],
|
| 327 |
+
num_labels=5,
|
| 328 |
+
local_files_only=True
|
| 329 |
+
)
|
| 330 |
+
model = model.to(device)
|
| 331 |
+
|
| 332 |
+
total_params = sum(p.numel() for p in model.parameters())
|
| 333 |
+
print(f" ✓ Model loaded")
|
| 334 |
+
print(f" ✓ Total parameters: {total_params:,}")
|
| 335 |
+
print()
|
| 336 |
+
|
| 337 |
+
# ============================================================
|
| 338 |
+
# LOSS FUNCTION WITH CLASS WEIGHTS
|
| 339 |
+
# ============================================================
|
| 340 |
+
|
| 341 |
+
if CONFIG['use_class_weights'] and class_weights is not None:
|
| 342 |
+
class_weights = class_weights.to(device)
|
| 343 |
+
criterion = nn.CrossEntropyLoss(weight=class_weights)
|
| 344 |
+
print(f" ✓ Using weighted CrossEntropyLoss")
|
| 345 |
+
else:
|
| 346 |
+
criterion = nn.CrossEntropyLoss()
|
| 347 |
+
print(f" ✓ Using standard CrossEntropyLoss")
|
| 348 |
+
print()
|
| 349 |
+
|
| 350 |
+
# ============================================================
|
| 351 |
+
# OPTIMIZER & SCHEDULER
|
| 352 |
+
# ============================================================
|
| 353 |
+
|
| 354 |
+
optimizer = torch.optim.AdamW(
|
| 355 |
+
model.parameters(),
|
| 356 |
+
lr=CONFIG['learning_rate'],
|
| 357 |
+
weight_decay=CONFIG['weight_decay']
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
total_steps = len(train_loader) * CONFIG['epochs']
|
| 361 |
+
warmup_steps = int(total_steps * CONFIG['warmup_ratio'])
|
| 362 |
+
|
| 363 |
+
scheduler = get_linear_schedule_with_warmup(
|
| 364 |
+
optimizer,
|
| 365 |
+
num_warmup_steps=warmup_steps,
|
| 366 |
+
num_training_steps=total_steps
|
| 367 |
+
)
|
| 368 |
+
|
| 369 |
+
# Mixed Precision Scaler (for speed)
|
| 370 |
+
scaler = torch.amp.GradScaler('cuda') if CONFIG['use_amp'] else None
|
| 371 |
+
|
| 372 |
+
print("Optimizer & Scheduler configured:")
|
| 373 |
+
print(f" ✓ Optimizer: AdamW (lr={CONFIG['learning_rate']})")
|
| 374 |
+
print(f" ✓ Total steps: {total_steps:,}")
|
| 375 |
+
print(f" ✓ Warmup steps: {warmup_steps:,}")
|
| 376 |
+
print(f" ✓ Mixed Precision: {CONFIG['use_amp']}")
|
| 377 |
+
print()
|
| 378 |
+
|
| 379 |
+
# ============================================================
|
| 380 |
+
# HELPER FUNCTION: Save checkpoint
|
| 381 |
+
# ============================================================
|
| 382 |
+
|
| 383 |
+
def save_checkpoint(model, tokenizer, optimizer, scheduler, scaler, epoch,
|
| 384 |
+
val_acc, history, path, is_best=False):
|
| 385 |
+
"""Save a training checkpoint"""
|
| 386 |
+
checkpoint = {
|
| 387 |
+
'epoch': epoch,
|
| 388 |
+
'model_state_dict': model.state_dict(),
|
| 389 |
+
'optimizer_state_dict': optimizer.state_dict(),
|
| 390 |
+
'scheduler_state_dict': scheduler.state_dict(),
|
| 391 |
+
'scaler_state_dict': scaler.state_dict() if scaler else None,
|
| 392 |
+
'val_accuracy': val_acc,
|
| 393 |
+
'history': history,
|
| 394 |
+
'config': CONFIG,
|
| 395 |
+
}
|
| 396 |
+
torch.save(checkpoint, path)
|
| 397 |
+
|
| 398 |
+
if is_best:
|
| 399 |
+
model.save_pretrained(CONFIG['output_dir'])
|
| 400 |
+
tokenizer.save_pretrained(CONFIG['output_dir'])
|
| 401 |
+
|
| 402 |
+
# ============================================================
|
| 403 |
+
# TRAINING LOOP (with crash protection)
|
| 404 |
+
# ============================================================
|
| 405 |
+
|
| 406 |
+
print("=" * 70)
|
| 407 |
+
print("TRAINING STARTED")
|
| 408 |
+
print("=" * 70)
|
| 409 |
+
print(f" Epochs: {CONFIG['epochs']}")
|
| 410 |
+
print(f" Batch size: {CONFIG['batch_size']}")
|
| 411 |
+
print(f" Max length: {CONFIG['max_length']}")
|
| 412 |
+
print(f" Device: {device}")
|
| 413 |
+
print(f" AMP: {CONFIG['use_amp']}")
|
| 414 |
+
print(f" Class weights: {CONFIG['use_class_weights']}")
|
| 415 |
+
print("=" * 70)
|
| 416 |
+
print()
|
| 417 |
+
|
| 418 |
+
best_val_acc = 0
|
| 419 |
+
history = {
|
| 420 |
+
'train_loss': [],
|
| 421 |
+
'train_acc': [],
|
| 422 |
+
'val_loss': [],
|
| 423 |
+
'val_acc': []
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
total_train_time = 0
|
| 427 |
+
|
| 428 |
+
# ============================================================
|
| 429 |
+
# WRAP IN TRY/EXCEPT FOR CRASH PROTECTION
|
| 430 |
+
# ============================================================
|
| 431 |
+
|
| 432 |
+
try:
|
| 433 |
+
for epoch in range(CONFIG['epochs']):
|
| 434 |
+
epoch_start_time = time.time()
|
| 435 |
+
|
| 436 |
+
# ==================== TRAINING ====================
|
| 437 |
+
model.train()
|
| 438 |
+
train_loss = 0
|
| 439 |
+
train_correct = 0
|
| 440 |
+
train_total = 0
|
| 441 |
+
|
| 442 |
+
train_pbar = tqdm(
|
| 443 |
+
train_loader,
|
| 444 |
+
desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [TRAIN]",
|
| 445 |
+
unit="batch",
|
| 446 |
+
ncols=120
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
for batch_idx, (input_ids, attention_mask, labels) in enumerate(train_pbar):
|
| 450 |
+
# Move to GPU with non_blocking for speed
|
| 451 |
+
input_ids = input_ids.to(device, non_blocking=True)
|
| 452 |
+
attention_mask = attention_mask.to(device, non_blocking=True)
|
| 453 |
+
labels = labels.to(device, non_blocking=True)
|
| 454 |
+
|
| 455 |
+
optimizer.zero_grad()
|
| 456 |
+
|
| 457 |
+
# Mixed precision forward pass
|
| 458 |
+
if CONFIG['use_amp']:
|
| 459 |
+
with torch.amp.autocast('cuda'):
|
| 460 |
+
outputs = model(
|
| 461 |
+
input_ids=input_ids,
|
| 462 |
+
attention_mask=attention_mask
|
| 463 |
+
)
|
| 464 |
+
# USE CUSTOM LOSS WITH CLASS WEIGHTS
|
| 465 |
+
logits = outputs.logits
|
| 466 |
+
loss = criterion(logits, labels)
|
| 467 |
+
|
| 468 |
+
scaler.scale(loss).backward()
|
| 469 |
+
scaler.unscale_(optimizer)
|
| 470 |
+
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
| 471 |
+
scaler.step(optimizer)
|
| 472 |
+
scaler.update()
|
| 473 |
+
else:
|
| 474 |
+
outputs = model(
|
| 475 |
+
input_ids=input_ids,
|
| 476 |
+
attention_mask=attention_mask
|
| 477 |
+
)
|
| 478 |
+
logits = outputs.logits
|
| 479 |
+
loss = criterion(logits, labels)
|
| 480 |
+
loss.backward()
|
| 481 |
+
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
| 482 |
+
optimizer.step()
|
| 483 |
+
|
| 484 |
+
scheduler.step()
|
| 485 |
+
|
| 486 |
+
train_loss += loss.item()
|
| 487 |
+
_, predicted = logits.max(1)
|
| 488 |
+
train_total += labels.size(0)
|
| 489 |
+
train_correct += predicted.eq(labels).sum().item()
|
| 490 |
+
|
| 491 |
+
running_loss = train_loss / (batch_idx + 1)
|
| 492 |
+
running_acc = 100 * train_correct / train_total
|
| 493 |
+
current_lr = scheduler.get_last_lr()[0]
|
| 494 |
+
|
| 495 |
+
# Show GPU memory usage
|
| 496 |
+
if torch.cuda.is_available():
|
| 497 |
+
mem_used = torch.cuda.memory_allocated() / 1e9
|
| 498 |
+
mem_total = torch.cuda.get_device_properties(0).total_memory / 1e9
|
| 499 |
+
|
| 500 |
+
train_pbar.set_postfix({
|
| 501 |
+
'loss': f'{running_loss:.4f}',
|
| 502 |
+
'acc': f'{running_acc:.2f}%',
|
| 503 |
+
'lr': f'{current_lr:.2e}',
|
| 504 |
+
'VRAM': f'{mem_used:.1f}/{mem_total:.1f}GB'
|
| 505 |
+
})
|
| 506 |
+
|
| 507 |
+
train_loss = train_loss / len(train_loader)
|
| 508 |
+
train_acc = 100 * train_correct / train_total
|
| 509 |
+
|
| 510 |
+
# ==================== VALIDATION ====================
|
| 511 |
+
model.eval()
|
| 512 |
+
val_loss = 0
|
| 513 |
+
val_correct = 0
|
| 514 |
+
val_total = 0
|
| 515 |
+
|
| 516 |
+
val_pbar = tqdm(
|
| 517 |
+
val_loader,
|
| 518 |
+
desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [VAL] ",
|
| 519 |
+
unit="batch",
|
| 520 |
+
ncols=120
|
| 521 |
+
)
|
| 522 |
+
|
| 523 |
+
with torch.no_grad():
|
| 524 |
+
for batch_idx, (input_ids, attention_mask, labels) in enumerate(val_pbar):
|
| 525 |
+
input_ids = input_ids.to(device, non_blocking=True)
|
| 526 |
+
attention_mask = attention_mask.to(device, non_blocking=True)
|
| 527 |
+
labels = labels.to(device, non_blocking=True)
|
| 528 |
+
|
| 529 |
+
if CONFIG['use_amp']:
|
| 530 |
+
with torch.amp.autocast('cuda'):
|
| 531 |
+
outputs = model(
|
| 532 |
+
input_ids=input_ids,
|
| 533 |
+
attention_mask=attention_mask
|
| 534 |
+
)
|
| 535 |
+
logits = outputs.logits
|
| 536 |
+
loss = criterion(logits, labels)
|
| 537 |
+
else:
|
| 538 |
+
outputs = model(
|
| 539 |
+
input_ids=input_ids,
|
| 540 |
+
attention_mask=attention_mask
|
| 541 |
+
)
|
| 542 |
+
logits = outputs.logits
|
| 543 |
+
loss = criterion(logits, labels)
|
| 544 |
+
|
| 545 |
+
val_loss += loss.item()
|
| 546 |
+
_, predicted = logits.max(1)
|
| 547 |
+
val_total += labels.size(0)
|
| 548 |
+
val_correct += predicted.eq(labels).sum().item()
|
| 549 |
+
|
| 550 |
+
running_loss = val_loss / (batch_idx + 1)
|
| 551 |
+
running_acc = 100 * val_correct / val_total
|
| 552 |
+
|
| 553 |
+
val_pbar.set_postfix({
|
| 554 |
+
'loss': f'{running_loss:.4f}',
|
| 555 |
+
'acc': f'{running_acc:.2f}%'
|
| 556 |
+
})
|
| 557 |
+
|
| 558 |
+
val_loss = val_loss / len(val_loader)
|
| 559 |
+
val_acc = 100 * val_correct / val_total
|
| 560 |
+
|
| 561 |
+
history['train_loss'].append(train_loss)
|
| 562 |
+
history['train_acc'].append(train_acc)
|
| 563 |
+
history['val_loss'].append(val_loss)
|
| 564 |
+
history['val_acc'].append(val_acc)
|
| 565 |
+
|
| 566 |
+
epoch_time = time.time() - epoch_start_time
|
| 567 |
+
total_train_time += epoch_time
|
| 568 |
+
|
| 569 |
+
# ==================== EPOCH SUMMARY ====================
|
| 570 |
+
print()
|
| 571 |
+
print("─" * 70)
|
| 572 |
+
print(f"EPOCH {epoch+1}/{CONFIG['epochs']} SUMMARY")
|
| 573 |
+
print("─" * 70)
|
| 574 |
+
print(f" {'Metric':<20} {'Train':>15} {'Validation':>15}")
|
| 575 |
+
print(f" {'-'*20} {'-'*15} {'-'*15}")
|
| 576 |
+
print(f" {'Loss':<20} {train_loss:>15.4f} {val_loss:>15.4f}")
|
| 577 |
+
print(f" {'Accuracy':<20} {train_acc:>14.2f}% {val_acc:>14.2f}%")
|
| 578 |
+
print(f" {'-'*20} {'-'*15} {'-'*15}")
|
| 579 |
+
print(f" {'Time':<20} {epoch_time:>14.1f}s")
|
| 580 |
+
print(f" {'Samples/sec':<20} {len(train_dataset)/epoch_time:>14.1f}")
|
| 581 |
+
|
| 582 |
+
# ==================== SAVE CHECKPOINT ====================
|
| 583 |
+
is_best = val_acc > best_val_acc
|
| 584 |
+
|
| 585 |
+
if is_best:
|
| 586 |
+
best_val_acc = val_acc
|
| 587 |
+
|
| 588 |
+
# Always save periodic checkpoint
|
| 589 |
+
if CONFIG['checkpoint_every_epoch']:
|
| 590 |
+
checkpoint_path = os.path.join(
|
| 591 |
+
CONFIG['checkpoint_dir'],
|
| 592 |
+
f'checkpoint_epoch_{epoch+1}.pt'
|
| 593 |
+
)
|
| 594 |
+
save_checkpoint(
|
| 595 |
+
model, tokenizer, optimizer, scheduler, scaler,
|
| 596 |
+
epoch + 1, val_acc, history, checkpoint_path, is_best=is_best
|
| 597 |
+
)
|
| 598 |
+
print(f"\n 💾 Checkpoint saved: {checkpoint_path}")
|
| 599 |
+
|
| 600 |
+
if is_best:
|
| 601 |
+
# Also save as best model
|
| 602 |
+
torch.save({
|
| 603 |
+
'epoch': epoch + 1,
|
| 604 |
+
'best_val_accuracy': best_val_acc,
|
| 605 |
+
'config': CONFIG,
|
| 606 |
+
'history': history
|
| 607 |
+
}, os.path.join(CONFIG['output_dir'], 'training_info.pt'))
|
| 608 |
+
|
| 609 |
+
print(f" 🏆 NEW BEST MODEL SAVED! Val Accuracy: {best_val_acc:.2f}%")
|
| 610 |
+
else:
|
| 611 |
+
print(f"\n ℹ️ Best Val Accuracy so far: {best_val_acc:.2f}%")
|
| 612 |
+
|
| 613 |
+
print("─" * 70)
|
| 614 |
+
print()
|
| 615 |
+
|
| 616 |
+
except Exception as e:
|
| 617 |
+
# ============================================================
|
| 618 |
+
# EMERGENCY SAVE ON CRASH
|
| 619 |
+
# ============================================================
|
| 620 |
+
print()
|
| 621 |
+
print("!" * 70)
|
| 622 |
+
print("⚠️ ERROR OCCURRED - SAVING EMERGENCY CHECKPOINT")
|
| 623 |
+
print("!" * 70)
|
| 624 |
+
print(f" Error: {e}")
|
| 625 |
+
|
| 626 |
+
emergency_dir = CONFIG['output_dir'] + '_emergency'
|
| 627 |
+
os.makedirs(emergency_dir, exist_ok=True)
|
| 628 |
+
|
| 629 |
+
try:
|
| 630 |
+
model.save_pretrained(emergency_dir)
|
| 631 |
+
tokenizer.save_pretrained(emergency_dir)
|
| 632 |
+
|
| 633 |
+
torch.save({
|
| 634 |
+
'epoch': epoch + 1 if 'epoch' in dir() else 0,
|
| 635 |
+
'history': history,
|
| 636 |
+
'config': CONFIG,
|
| 637 |
+
'error': str(e)
|
| 638 |
+
}, os.path.join(emergency_dir, 'emergency_checkpoint.pt'))
|
| 639 |
+
|
| 640 |
+
print(f" ✓ Emergency checkpoint saved to: {emergency_dir}")
|
| 641 |
+
except Exception as save_error:
|
| 642 |
+
print(f" ✗ Failed to save emergency checkpoint: {save_error}")
|
| 643 |
+
|
| 644 |
+
print("!" * 70)
|
| 645 |
+
raise # Re-raise the exception
|
| 646 |
+
|
| 647 |
+
print("=" * 70)
|
| 648 |
+
print("TRAINING COMPLETE")
|
| 649 |
+
print("=" * 70)
|
| 650 |
+
print(f" Total training time: {total_train_time/60:.1f} minutes")
|
| 651 |
+
print(f" Best Val Accuracy: {best_val_acc:.2f}%")
|
| 652 |
+
print("=" * 70)
|
| 653 |
+
print()
|
| 654 |
+
|
| 655 |
+
# ============================================================
|
| 656 |
+
# FINAL TEST EVALUATION
|
| 657 |
+
# ============================================================
|
| 658 |
+
|
| 659 |
+
print("=" * 70)
|
| 660 |
+
print("FINAL TEST EVALUATION")
|
| 661 |
+
print("=" * 70)
|
| 662 |
+
print("Loading best model...")
|
| 663 |
+
|
| 664 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
| 665 |
+
CONFIG['output_dir'],
|
| 666 |
+
local_files_only=True
|
| 667 |
+
)
|
| 668 |
+
model = model.to(device)
|
| 669 |
+
model.eval()
|
| 670 |
+
|
| 671 |
+
# Use standard loss for test evaluation (no class weights)
|
| 672 |
+
test_criterion = nn.CrossEntropyLoss()
|
| 673 |
+
|
| 674 |
+
test_loss = 0
|
| 675 |
+
test_correct = 0
|
| 676 |
+
test_total = 0
|
| 677 |
+
all_preds = []
|
| 678 |
+
all_labels = []
|
| 679 |
+
|
| 680 |
+
test_pbar = tqdm(test_loader, desc="Testing", unit="batch", ncols=120)
|
| 681 |
+
|
| 682 |
+
with torch.no_grad():
|
| 683 |
+
for batch_idx, (input_ids, attention_mask, labels) in enumerate(test_pbar):
|
| 684 |
+
input_ids = input_ids.to(device, non_blocking=True)
|
| 685 |
+
attention_mask = attention_mask.to(device, non_blocking=True)
|
| 686 |
+
labels = labels.to(device, non_blocking=True)
|
| 687 |
+
|
| 688 |
+
if CONFIG['use_amp']:
|
| 689 |
+
with torch.amp.autocast('cuda'):
|
| 690 |
+
outputs = model(
|
| 691 |
+
input_ids=input_ids,
|
| 692 |
+
attention_mask=attention_mask
|
| 693 |
+
)
|
| 694 |
+
logits = outputs.logits
|
| 695 |
+
loss = test_criterion(logits, labels)
|
| 696 |
+
else:
|
| 697 |
+
outputs = model(
|
| 698 |
+
input_ids=input_ids,
|
| 699 |
+
attention_mask=attention_mask
|
| 700 |
+
)
|
| 701 |
+
logits = outputs.logits
|
| 702 |
+
loss = test_criterion(logits, labels)
|
| 703 |
+
|
| 704 |
+
test_loss += loss.item()
|
| 705 |
+
_, predicted = logits.max(1)
|
| 706 |
+
test_total += labels.size(0)
|
| 707 |
+
test_correct += predicted.eq(labels).sum().item()
|
| 708 |
+
|
| 709 |
+
all_preds.extend(predicted.cpu().numpy())
|
| 710 |
+
all_labels.extend(labels.cpu().numpy())
|
| 711 |
+
|
| 712 |
+
test_pbar.set_postfix({
|
| 713 |
+
'loss': f'{test_loss/(batch_idx+1):.4f}',
|
| 714 |
+
'acc': f'{100*test_correct/test_total:.2f}%'
|
| 715 |
+
})
|
| 716 |
+
|
| 717 |
+
test_loss = test_loss / len(test_loader)
|
| 718 |
+
test_acc = 100 * test_correct / test_total
|
| 719 |
+
all_preds = np.array(all_preds)
|
| 720 |
+
all_labels = np.array(all_labels)
|
| 721 |
+
|
| 722 |
+
within_one = np.mean(np.abs(all_preds - all_labels) <= 1) * 100
|
| 723 |
+
|
| 724 |
+
print()
|
| 725 |
+
print("─" * 70)
|
| 726 |
+
print("TEST RESULTS")
|
| 727 |
+
print("─" * 70)
|
| 728 |
+
print(f" Test Loss: {test_loss:.4f}")
|
| 729 |
+
print(f" Test Accuracy: {test_acc:.2f}%")
|
| 730 |
+
print(f" Within ±1 Star: {within_one:.2f}%")
|
| 731 |
+
print("─" * 70)
|
| 732 |
+
print()
|
| 733 |
+
|
| 734 |
+
print("CLASSIFICATION REPORT")
|
| 735 |
+
print("─" * 70)
|
| 736 |
+
report = classification_report(
|
| 737 |
+
all_labels,
|
| 738 |
+
all_preds,
|
| 739 |
+
target_names=['1 Star', '2 Star', '3 Star', '4 Star', '5 Star'],
|
| 740 |
+
digits=3,
|
| 741 |
+
output_dict=True
|
| 742 |
+
)
|
| 743 |
+
print(classification_report(
|
| 744 |
+
all_labels,
|
| 745 |
+
all_preds,
|
| 746 |
+
target_names=['1 Star', '2 Star', '3 Star', '4 Star', '5 Star'],
|
| 747 |
+
digits=3
|
| 748 |
+
))
|
| 749 |
+
|
| 750 |
+
# ============================================================
|
| 751 |
+
# PLOTS
|
| 752 |
+
# ============================================================
|
| 753 |
+
|
| 754 |
+
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
|
| 755 |
+
epochs_range = range(1, len(history['train_loss']) + 1)
|
| 756 |
+
|
| 757 |
+
axes[0].plot(epochs_range, history['train_loss'], 'b-o', label='Train', linewidth=2)
|
| 758 |
+
axes[0].plot(epochs_range, history['val_loss'], 'r-o', label='Val', linewidth=2)
|
| 759 |
+
axes[0].set_xlabel('Epoch')
|
| 760 |
+
axes[0].set_ylabel('Loss')
|
| 761 |
+
axes[0].set_title('Loss (with Class Weights)' if CONFIG['use_class_weights'] else 'Loss')
|
| 762 |
+
axes[0].legend()
|
| 763 |
+
axes[0].grid(True, alpha=0.3)
|
| 764 |
+
|
| 765 |
+
axes[1].plot(epochs_range, history['train_acc'], 'b-o', label='Train', linewidth=2)
|
| 766 |
+
axes[1].plot(epochs_range, history['val_acc'], 'r-o', label='Val', linewidth=2)
|
| 767 |
+
axes[1].set_xlabel('Epoch')
|
| 768 |
+
axes[1].set_ylabel('Accuracy (%)')
|
| 769 |
+
axes[1].set_title('Accuracy')
|
| 770 |
+
axes[1].legend()
|
| 771 |
+
axes[1].grid(True, alpha=0.3)
|
| 772 |
+
|
| 773 |
+
plt.tight_layout()
|
| 774 |
+
plt.savefig('plots/training_history.png', dpi=150)
|
| 775 |
+
print("✓ Saved: plots/training_history.png")
|
| 776 |
+
|
| 777 |
+
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
|
| 778 |
+
cm = confusion_matrix(all_labels, all_preds)
|
| 779 |
+
labels_names = ['1 Star', '2 Star', '3 Star', '4 Star', '5 Star']
|
| 780 |
+
|
| 781 |
+
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
|
| 782 |
+
xticklabels=labels_names, yticklabels=labels_names, ax=axes[0])
|
| 783 |
+
axes[0].set_xlabel('Predicted')
|
| 784 |
+
axes[0].set_ylabel('Actual')
|
| 785 |
+
axes[0].set_title('Confusion Matrix (Counts)')
|
| 786 |
+
|
| 787 |
+
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
|
| 788 |
+
sns.heatmap(cm_norm, annot=True, fmt='.1%', cmap='Blues',
|
| 789 |
+
xticklabels=labels_names, yticklabels=labels_names, ax=axes[1])
|
| 790 |
+
axes[1].set_xlabel('Predicted')
|
| 791 |
+
axes[1].set_ylabel('Actual')
|
| 792 |
+
axes[1].set_title('Confusion Matrix (Normalized)')
|
| 793 |
+
|
| 794 |
+
plt.tight_layout()
|
| 795 |
+
plt.savefig('plots/confusion_matrix.png', dpi=150)
|
| 796 |
+
print("✓ Saved: plots/confusion_matrix.png")
|
| 797 |
+
|
| 798 |
+
# ============================================================
|
| 799 |
+
# PER-CLASS RECALL COMPARISON PLOT (NEW!)
|
| 800 |
+
# ============================================================
|
| 801 |
+
|
| 802 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 803 |
+
classes = ['1 Star', '2 Star', '3 Star', '4 Star', '5 Star']
|
| 804 |
+
recalls = [report[c]['recall'] * 100 for c in classes]
|
| 805 |
+
|
| 806 |
+
bars = ax.bar(classes, recalls, color=['#ff6b6b', '#ffa94d', '#ffd43b', '#69db7c', '#4dabf7'])
|
| 807 |
+
ax.axhline(y=50, color='red', linestyle='--', alpha=0.5, label='50% threshold')
|
| 808 |
+
ax.axhline(y=75, color='green', linestyle='--', alpha=0.5, label='75% threshold')
|
| 809 |
+
|
| 810 |
+
for bar, recall in zip(bars, recalls):
|
| 811 |
+
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
|
| 812 |
+
f'{recall:.1f}%', ha='center', va='bottom', fontsize=11)
|
| 813 |
+
|
| 814 |
+
ax.set_ylabel('Recall (%)')
|
| 815 |
+
ax.set_title('Per-Class Recall (Higher = Better at detecting this class)')
|
| 816 |
+
ax.set_ylim(0, 105)
|
| 817 |
+
ax.legend()
|
| 818 |
+
ax.grid(True, alpha=0.3, axis='y')
|
| 819 |
+
|
| 820 |
+
plt.tight_layout()
|
| 821 |
+
plt.savefig('plots/per_class_recall.png', dpi=150)
|
| 822 |
+
print("✓ Saved: plots/per_class_recall.png")
|
| 823 |
+
|
| 824 |
+
# ============================================================
|
| 825 |
+
# SAVE RESULTS
|
| 826 |
+
# ============================================================
|
| 827 |
+
|
| 828 |
+
results = {
|
| 829 |
+
'best_val_accuracy': best_val_acc,
|
| 830 |
+
'test_accuracy': test_acc,
|
| 831 |
+
'test_within_one': within_one,
|
| 832 |
+
'history': history,
|
| 833 |
+
'config': CONFIG,
|
| 834 |
+
'train_time_minutes': total_train_time / 60,
|
| 835 |
+
'classification_report': report,
|
| 836 |
+
'confusion_matrix': cm.tolist()
|
| 837 |
+
}
|
| 838 |
+
torch.save(results, os.path.join(CONFIG['output_dir'], 'results.pt'))
|
| 839 |
+
|
| 840 |
+
print()
|
| 841 |
+
print("=" * 70)
|
| 842 |
+
print("🎉 ALL DONE!")
|
| 843 |
+
print("=" * 70)
|
| 844 |
+
print(f" Best Val Accuracy: {best_val_acc:.2f}%")
|
| 845 |
+
print(f" Test Accuracy: {test_acc:.2f}%")
|
| 846 |
+
print(f" Within ±1 Star: {within_one:.2f}%")
|
| 847 |
+
print(f" Training Time: {total_train_time/60:.1f} minutes")
|
| 848 |
+
print()
|
| 849 |
+
print(" Per-Class Recall:")
|
| 850 |
+
for c in classes:
|
| 851 |
+
recall = report[c]['recall'] * 100
|
| 852 |
+
indicator = "✓" if recall >= 60 else "⚠️" if recall >= 40 else "✗"
|
| 853 |
+
print(f" {indicator} {c}: {recall:.1f}%")
|
| 854 |
+
print("=" * 70)
|
| 855 |
+
|
| 856 |
+
|
| 857 |
+
# ============================================================
|
| 858 |
+
# ENTRY POINT
|
| 859 |
+
# ============================================================
|
| 860 |
+
|
| 861 |
+
if __name__ == '__main__':
|
| 862 |
+
main()
|
course_feedback_nlp/train_3_classes.py
ADDED
|
@@ -0,0 +1,872 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Student Feedback Sentiment Model - Training Script
|
| 3 |
+
==================================================
|
| 4 |
+
Optimized for Teacher/Agent use case:
|
| 5 |
+
- 3 classes: Negative, Neutral, Positive
|
| 6 |
+
- High recall on negative feedback (don't miss struggling students)
|
| 7 |
+
- Confidence scores for uncertainty
|
| 8 |
+
- Fast inference for agent integration
|
| 9 |
+
|
| 10 |
+
FIXED: save_checkpoint now properly preserves model config.json
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import torch
|
| 14 |
+
import torch.nn as nn
|
| 15 |
+
import torch.nn.functional as F
|
| 16 |
+
from torch.utils.data import Dataset, DataLoader, TensorDataset
|
| 17 |
+
import pandas as pd
|
| 18 |
+
import numpy as np
|
| 19 |
+
from sklearn.model_selection import train_test_split
|
| 20 |
+
from sklearn.metrics import classification_report, confusion_matrix
|
| 21 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
|
| 22 |
+
from transformers import get_linear_schedule_with_warmup
|
| 23 |
+
from tqdm.auto import tqdm
|
| 24 |
+
import matplotlib.pyplot as plt
|
| 25 |
+
import seaborn as sns
|
| 26 |
+
import os
|
| 27 |
+
import time
|
| 28 |
+
import gc
|
| 29 |
+
import json
|
| 30 |
+
import warnings
|
| 31 |
+
warnings.filterwarnings('ignore')
|
| 32 |
+
|
| 33 |
+
# ============================================================
|
| 34 |
+
# AMD CRASH PROTECTION
|
| 35 |
+
# ============================================================
|
| 36 |
+
os.environ['AMD_LOG_LEVEL'] = '0'
|
| 37 |
+
os.environ['ROCM_LOG_LEVEL'] = '0'
|
| 38 |
+
os.environ['HIP_VISIBLE_DEVICES'] = '0'
|
| 39 |
+
|
| 40 |
+
# ============================================================
|
| 41 |
+
# CONFIGURATION - OPTIMIZED FOR TEACHER USE CASE
|
| 42 |
+
# ============================================================
|
| 43 |
+
|
| 44 |
+
CONFIG = {
|
| 45 |
+
# ==================== DATA ====================
|
| 46 |
+
'data_path': 'Coursera_reviews.csv',
|
| 47 |
+
'model_name': './distilbert-base-uncased',
|
| 48 |
+
'output_dir': 'teacher_sentiment_model',
|
| 49 |
+
'checkpoint_dir': 'checkpoints_teacher',
|
| 50 |
+
|
| 51 |
+
# ==================== CLASS MAPPING ====================
|
| 52 |
+
# Map 5-star ratings to 3 classes
|
| 53 |
+
'num_classes': 3,
|
| 54 |
+
'class_names': ['Negative', 'Neutral', 'Positive'],
|
| 55 |
+
'class_mapping': {
|
| 56 |
+
0: 0, # 1-star → Negative (0)
|
| 57 |
+
1: 0, # 2-star → Negative (0)
|
| 58 |
+
2: 1, # 3-star → Neutral (1)
|
| 59 |
+
3: 2, # 4-star → Positive (2)
|
| 60 |
+
4: 2, # 5-star → Positive (2)
|
| 61 |
+
},
|
| 62 |
+
|
| 63 |
+
# ==================== TOKENIZATION ====================
|
| 64 |
+
'max_length': 96,
|
| 65 |
+
|
| 66 |
+
# ==================== TRAINING ====================
|
| 67 |
+
'batch_size': 128,
|
| 68 |
+
'gradient_accumulation_steps': 2,
|
| 69 |
+
'epochs': 7,
|
| 70 |
+
'learning_rate': 2e-5,
|
| 71 |
+
'weight_decay': 0.01,
|
| 72 |
+
'warmup_ratio': 0.06,
|
| 73 |
+
'max_grad_norm': 1.0,
|
| 74 |
+
|
| 75 |
+
# ==================== SCHEDULER ====================
|
| 76 |
+
'scheduler_type': 'cosine',
|
| 77 |
+
'cosine_min_lr_ratio': 0.01,
|
| 78 |
+
|
| 79 |
+
# ==================== LOSS FUNCTION ====================
|
| 80 |
+
'loss_type': 'focal', # Focal loss to focus on hard examples
|
| 81 |
+
'focal_gamma': 2.0,
|
| 82 |
+
'label_smoothing': 0.05, # Light smoothing for calibration
|
| 83 |
+
|
| 84 |
+
# ==================== CLASS IMBALANCE ====================
|
| 85 |
+
# IMPORTANT: Weight negative class higher - we don't want to miss struggling students!
|
| 86 |
+
'use_class_weights': True,
|
| 87 |
+
'class_weight_power': 0.7, # Moderate-high weighting for minorities
|
| 88 |
+
'negative_class_boost': 1.5, # Extra boost for negative class (teacher priority)
|
| 89 |
+
|
| 90 |
+
# ==================== EARLY STOPPING ====================
|
| 91 |
+
'early_stopping': True,
|
| 92 |
+
'early_stopping_patience': 3,
|
| 93 |
+
'early_stopping_metric': 'val_loss',
|
| 94 |
+
|
| 95 |
+
# ==================== HARDWARE ====================
|
| 96 |
+
'seed': 42,
|
| 97 |
+
'num_workers': 4,
|
| 98 |
+
'pin_memory': True,
|
| 99 |
+
'use_amp': True,
|
| 100 |
+
|
| 101 |
+
# ==================== CHECKPOINTING ====================
|
| 102 |
+
'checkpoint_every_epoch': True,
|
| 103 |
+
'save_total_limit': 3,
|
| 104 |
+
|
| 105 |
+
# ==================== DATA SPLIT ====================
|
| 106 |
+
'train_size': 0.8,
|
| 107 |
+
'val_size': 0.1,
|
| 108 |
+
'test_size': 0.1,
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# ============================================================
|
| 113 |
+
# CUSTOM LOSS FUNCTIONS
|
| 114 |
+
# ============================================================
|
| 115 |
+
|
| 116 |
+
class FocalLoss(nn.Module):
|
| 117 |
+
"""
|
| 118 |
+
Focal Loss with label smoothing.
|
| 119 |
+
Focuses training on hard-to-classify examples.
|
| 120 |
+
"""
|
| 121 |
+
def __init__(self, num_classes=3, gamma=2.0, alpha=None, label_smoothing=0.0):
|
| 122 |
+
super().__init__()
|
| 123 |
+
self.num_classes = num_classes
|
| 124 |
+
self.gamma = gamma
|
| 125 |
+
self.label_smoothing = label_smoothing
|
| 126 |
+
|
| 127 |
+
if alpha is not None:
|
| 128 |
+
self.register_buffer('alpha', alpha)
|
| 129 |
+
else:
|
| 130 |
+
self.alpha = None
|
| 131 |
+
|
| 132 |
+
def forward(self, logits, targets):
|
| 133 |
+
probs = F.softmax(logits, dim=-1)
|
| 134 |
+
pt = probs.gather(1, targets.unsqueeze(1)).squeeze(1)
|
| 135 |
+
|
| 136 |
+
# Focal weight
|
| 137 |
+
focal_weight = (1 - pt) ** self.gamma
|
| 138 |
+
|
| 139 |
+
# Cross entropy with optional label smoothing
|
| 140 |
+
if self.label_smoothing > 0:
|
| 141 |
+
confidence = 1.0 - self.label_smoothing
|
| 142 |
+
smooth_value = self.label_smoothing / (self.num_classes - 1)
|
| 143 |
+
one_hot = torch.zeros_like(logits).scatter_(1, targets.unsqueeze(1), 1)
|
| 144 |
+
smooth_targets = one_hot * confidence + (1 - one_hot) * smooth_value
|
| 145 |
+
log_probs = F.log_softmax(logits, dim=-1)
|
| 146 |
+
ce = -(smooth_targets * log_probs).sum(dim=-1)
|
| 147 |
+
else:
|
| 148 |
+
ce = F.cross_entropy(logits, targets, reduction='none')
|
| 149 |
+
|
| 150 |
+
loss = focal_weight * ce
|
| 151 |
+
|
| 152 |
+
if self.alpha is not None:
|
| 153 |
+
alpha_t = self.alpha[targets]
|
| 154 |
+
loss = alpha_t * loss
|
| 155 |
+
|
| 156 |
+
return loss.mean()
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
# ============================================================
|
| 160 |
+
# UTILITY FUNCTIONS
|
| 161 |
+
# ============================================================
|
| 162 |
+
|
| 163 |
+
def set_seed(seed):
|
| 164 |
+
torch.manual_seed(seed)
|
| 165 |
+
torch.cuda.manual_seed_all(seed)
|
| 166 |
+
np.random.seed(seed)
|
| 167 |
+
torch.backends.cudnn.deterministic = True
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def get_scheduler(optimizer, scheduler_type, total_steps, warmup_steps, config):
|
| 171 |
+
if scheduler_type == 'cosine':
|
| 172 |
+
min_lr_ratio = config.get('cosine_min_lr_ratio', 0.01)
|
| 173 |
+
|
| 174 |
+
def lr_lambda(current_step):
|
| 175 |
+
if current_step < warmup_steps:
|
| 176 |
+
return float(current_step) / float(max(1, warmup_steps))
|
| 177 |
+
progress = float(current_step - warmup_steps) / float(max(1, total_steps - warmup_steps))
|
| 178 |
+
return max(min_lr_ratio, 0.5 * (1.0 + np.cos(np.pi * progress)))
|
| 179 |
+
|
| 180 |
+
return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
|
| 181 |
+
else:
|
| 182 |
+
return get_linear_schedule_with_warmup(optimizer, warmup_steps, total_steps)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def save_checkpoint(model, tokenizer, optimizer, scheduler, scaler, epoch,
|
| 186 |
+
val_acc, val_loss, history, config, path, is_best=False):
|
| 187 |
+
"""
|
| 188 |
+
Save training checkpoint.
|
| 189 |
+
|
| 190 |
+
FIXED: Now saves training_config.json separately instead of overwriting
|
| 191 |
+
the model's config.json (which needs model_type for loading).
|
| 192 |
+
"""
|
| 193 |
+
checkpoint = {
|
| 194 |
+
'epoch': epoch,
|
| 195 |
+
'model_state_dict': model.state_dict(),
|
| 196 |
+
'optimizer_state_dict': optimizer.state_dict(),
|
| 197 |
+
'scheduler_state_dict': scheduler.state_dict(),
|
| 198 |
+
'scaler_state_dict': scaler.state_dict() if scaler else None,
|
| 199 |
+
'val_accuracy': val_acc,
|
| 200 |
+
'val_loss': val_loss,
|
| 201 |
+
'history': history,
|
| 202 |
+
'config': config,
|
| 203 |
+
}
|
| 204 |
+
torch.save(checkpoint, path)
|
| 205 |
+
|
| 206 |
+
if is_best:
|
| 207 |
+
# Save model and tokenizer - this creates the correct config.json with model_type
|
| 208 |
+
model.save_pretrained(config['output_dir'])
|
| 209 |
+
tokenizer.save_pretrained(config['output_dir'])
|
| 210 |
+
|
| 211 |
+
# FIXED: Save our custom training config to a SEPARATE file
|
| 212 |
+
# DO NOT overwrite the model's config.json!
|
| 213 |
+
training_config_path = os.path.join(config['output_dir'], 'training_config.json')
|
| 214 |
+
training_config = {
|
| 215 |
+
'num_classes': config['num_classes'],
|
| 216 |
+
'class_names': config['class_names'],
|
| 217 |
+
'class_mapping': {str(k): v for k, v in config['class_mapping'].items()}, # JSON needs string keys
|
| 218 |
+
'max_length': config['max_length'],
|
| 219 |
+
}
|
| 220 |
+
with open(training_config_path, 'w') as f:
|
| 221 |
+
json.dump(training_config, f, indent=2)
|
| 222 |
+
|
| 223 |
+
# Also update the model's config.json with our label mappings (properly!)
|
| 224 |
+
model_config = AutoConfig.from_pretrained(config['output_dir'])
|
| 225 |
+
model_config.num_labels = config['num_classes']
|
| 226 |
+
model_config.id2label = {i: name for i, name in enumerate(config['class_names'])}
|
| 227 |
+
model_config.label2id = {name: i for i, name in enumerate(config['class_names'])}
|
| 228 |
+
model_config.save_pretrained(config['output_dir'])
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def cleanup_old_checkpoints(checkpoint_dir, save_total_limit):
|
| 232 |
+
if save_total_limit is None or save_total_limit <= 0:
|
| 233 |
+
return
|
| 234 |
+
checkpoints = sorted([
|
| 235 |
+
f for f in os.listdir(checkpoint_dir)
|
| 236 |
+
if f.startswith('checkpoint_epoch_') and f.endswith('.pt')
|
| 237 |
+
])
|
| 238 |
+
while len(checkpoints) > save_total_limit:
|
| 239 |
+
oldest = checkpoints.pop(0)
|
| 240 |
+
os.remove(os.path.join(checkpoint_dir, oldest))
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def tokenize_batch(texts, tokenizer, max_length, desc="Tokenizing"):
|
| 244 |
+
all_input_ids = []
|
| 245 |
+
all_attention_masks = []
|
| 246 |
+
batch_size = 10000
|
| 247 |
+
|
| 248 |
+
for i in tqdm(range(0, len(texts), batch_size), desc=desc):
|
| 249 |
+
batch_texts = texts[i:i+batch_size].tolist()
|
| 250 |
+
encodings = tokenizer(
|
| 251 |
+
batch_texts,
|
| 252 |
+
truncation=True,
|
| 253 |
+
padding='max_length',
|
| 254 |
+
max_length=max_length,
|
| 255 |
+
return_tensors='pt'
|
| 256 |
+
)
|
| 257 |
+
all_input_ids.append(encodings['input_ids'])
|
| 258 |
+
all_attention_masks.append(encodings['attention_mask'])
|
| 259 |
+
|
| 260 |
+
return torch.cat(all_input_ids, dim=0), torch.cat(all_attention_masks, dim=0)
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
# ============================================================
|
| 264 |
+
# MAIN FUNCTION
|
| 265 |
+
# ============================================================
|
| 266 |
+
|
| 267 |
+
def main():
|
| 268 |
+
set_seed(CONFIG['seed'])
|
| 269 |
+
|
| 270 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 271 |
+
|
| 272 |
+
print("=" * 70)
|
| 273 |
+
print("STUDENT FEEDBACK SENTIMENT MODEL")
|
| 274 |
+
print("Optimized for Teacher/Agent Use Case")
|
| 275 |
+
print("=" * 70)
|
| 276 |
+
print()
|
| 277 |
+
print("TARGET CLASSES:")
|
| 278 |
+
print(" 🔴 Negative (1-2 stars) → 'Needs Attention'")
|
| 279 |
+
print(" 🟡 Neutral (3 stars) → 'Mixed/Unclear'")
|
| 280 |
+
print(" 🟢 Positive (4-5 stars) → 'Satisfied'")
|
| 281 |
+
print()
|
| 282 |
+
print(f"Device: {device}")
|
| 283 |
+
if torch.cuda.is_available():
|
| 284 |
+
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
| 285 |
+
print("=" * 70)
|
| 286 |
+
print()
|
| 287 |
+
|
| 288 |
+
# Create directories
|
| 289 |
+
os.makedirs(CONFIG['output_dir'], exist_ok=True)
|
| 290 |
+
os.makedirs(CONFIG['checkpoint_dir'], exist_ok=True)
|
| 291 |
+
os.makedirs('plots', exist_ok=True)
|
| 292 |
+
|
| 293 |
+
# ============================================================
|
| 294 |
+
# DATA LOADING & PREPROCESSING
|
| 295 |
+
# ============================================================
|
| 296 |
+
|
| 297 |
+
print("LOADING DATA")
|
| 298 |
+
print("-" * 70)
|
| 299 |
+
|
| 300 |
+
df = pd.read_csv(CONFIG['data_path'])
|
| 301 |
+
print(f"Raw data: {len(df):,} samples")
|
| 302 |
+
|
| 303 |
+
# Clean
|
| 304 |
+
df = df.dropna(subset=['reviews', 'rating'])
|
| 305 |
+
df = df[df['reviews'].str.strip() != '']
|
| 306 |
+
df['rating'] = df['rating'].astype(int)
|
| 307 |
+
df = df[df['rating'].between(1, 5)]
|
| 308 |
+
|
| 309 |
+
# Original 5-class labels
|
| 310 |
+
df['label_5class'] = df['rating'] - 1
|
| 311 |
+
|
| 312 |
+
# Map to 3 classes
|
| 313 |
+
df['label'] = df['label_5class'].map(CONFIG['class_mapping'])
|
| 314 |
+
|
| 315 |
+
print(f"Cleaned data: {len(df):,} samples")
|
| 316 |
+
print()
|
| 317 |
+
|
| 318 |
+
# Show original distribution
|
| 319 |
+
print("Original 5-class distribution:")
|
| 320 |
+
for rating in range(1, 6):
|
| 321 |
+
count = (df['rating'] == rating).sum()
|
| 322 |
+
pct = 100 * count / len(df)
|
| 323 |
+
print(f" {rating} Star: {count:>8,} ({pct:>5.1f}%)")
|
| 324 |
+
print()
|
| 325 |
+
|
| 326 |
+
# Show new 3-class distribution
|
| 327 |
+
print("New 3-class distribution:")
|
| 328 |
+
class_counts_3 = []
|
| 329 |
+
for label, name in enumerate(CONFIG['class_names']):
|
| 330 |
+
count = (df['label'] == label).sum()
|
| 331 |
+
pct = 100 * count / len(df)
|
| 332 |
+
class_counts_3.append(count)
|
| 333 |
+
emoji = ['🔴', '🟡', '🟢'][label]
|
| 334 |
+
print(f" {emoji} {name}: {count:>8,} ({pct:>5.1f}%)")
|
| 335 |
+
print()
|
| 336 |
+
|
| 337 |
+
# ============================================================
|
| 338 |
+
# CALCULATE CLASS WEIGHTS
|
| 339 |
+
# ============================================================
|
| 340 |
+
|
| 341 |
+
if CONFIG['use_class_weights']:
|
| 342 |
+
print("Calculating class weights...")
|
| 343 |
+
class_counts = np.array(class_counts_3)
|
| 344 |
+
|
| 345 |
+
# Inverse frequency
|
| 346 |
+
weights = 1.0 / class_counts
|
| 347 |
+
weights = weights / weights.sum() * len(weights)
|
| 348 |
+
|
| 349 |
+
# Apply power scaling
|
| 350 |
+
power = CONFIG['class_weight_power']
|
| 351 |
+
weights = weights ** power
|
| 352 |
+
weights = weights / weights.sum() * len(weights)
|
| 353 |
+
|
| 354 |
+
# Extra boost for negative class (teacher priority!)
|
| 355 |
+
negative_boost = CONFIG.get('negative_class_boost', 1.0)
|
| 356 |
+
weights[0] = weights[0] * negative_boost
|
| 357 |
+
|
| 358 |
+
# Re-normalize
|
| 359 |
+
weights = weights / weights.sum() * len(weights)
|
| 360 |
+
|
| 361 |
+
class_weights = torch.tensor(weights, dtype=torch.float32)
|
| 362 |
+
|
| 363 |
+
print("Class weights (higher = more important):")
|
| 364 |
+
for i, (name, w) in enumerate(zip(CONFIG['class_names'], class_weights)):
|
| 365 |
+
bar = "█" * int(w * 15)
|
| 366 |
+
boost_note = " ← BOOSTED (teacher priority)" if i == 0 else ""
|
| 367 |
+
print(f" {name}: {w:.4f} {bar}{boost_note}")
|
| 368 |
+
print()
|
| 369 |
+
else:
|
| 370 |
+
class_weights = None
|
| 371 |
+
|
| 372 |
+
# ============================================================
|
| 373 |
+
# TRAIN / VAL / TEST SPLIT
|
| 374 |
+
# ============================================================
|
| 375 |
+
|
| 376 |
+
print("SPLITTING DATA")
|
| 377 |
+
print("-" * 70)
|
| 378 |
+
|
| 379 |
+
X_temp, X_test, y_temp, y_test = train_test_split(
|
| 380 |
+
df['reviews'].values, df['label'].values,
|
| 381 |
+
test_size=CONFIG['test_size'],
|
| 382 |
+
random_state=CONFIG['seed'],
|
| 383 |
+
stratify=df['label'].values
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
val_ratio = CONFIG['val_size'] / (CONFIG['train_size'] + CONFIG['val_size'])
|
| 387 |
+
X_train, X_val, y_train, y_val = train_test_split(
|
| 388 |
+
X_temp, y_temp,
|
| 389 |
+
test_size=val_ratio,
|
| 390 |
+
random_state=CONFIG['seed'],
|
| 391 |
+
stratify=y_temp
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
print(f"Train: {len(X_train):,} | Val: {len(X_val):,} | Test: {len(X_test):,}")
|
| 395 |
+
print()
|
| 396 |
+
|
| 397 |
+
del df
|
| 398 |
+
gc.collect()
|
| 399 |
+
|
| 400 |
+
# ============================================================
|
| 401 |
+
# TOKENIZATION
|
| 402 |
+
# ============================================================
|
| 403 |
+
|
| 404 |
+
print("TOKENIZATION")
|
| 405 |
+
print("-" * 70)
|
| 406 |
+
|
| 407 |
+
tokenizer = AutoTokenizer.from_pretrained(CONFIG['model_name'], local_files_only=True)
|
| 408 |
+
|
| 409 |
+
train_ids, train_masks = tokenize_batch(X_train, tokenizer, CONFIG['max_length'], "Train")
|
| 410 |
+
val_ids, val_masks = tokenize_batch(X_val, tokenizer, CONFIG['max_length'], "Val")
|
| 411 |
+
test_ids, test_masks = tokenize_batch(X_test, tokenizer, CONFIG['max_length'], "Test")
|
| 412 |
+
|
| 413 |
+
train_labels = torch.tensor(y_train, dtype=torch.long)
|
| 414 |
+
val_labels = torch.tensor(y_val, dtype=torch.long)
|
| 415 |
+
test_labels = torch.tensor(y_test, dtype=torch.long)
|
| 416 |
+
|
| 417 |
+
del X_train, X_val, X_test, y_train, y_val, y_test, X_temp, y_temp
|
| 418 |
+
gc.collect()
|
| 419 |
+
|
| 420 |
+
print()
|
| 421 |
+
|
| 422 |
+
# ============================================================
|
| 423 |
+
# DATALOADERS
|
| 424 |
+
# ============================================================
|
| 425 |
+
|
| 426 |
+
train_dataset = TensorDataset(train_ids, train_masks, train_labels)
|
| 427 |
+
val_dataset = TensorDataset(val_ids, val_masks, val_labels)
|
| 428 |
+
test_dataset = TensorDataset(test_ids, test_masks, test_labels)
|
| 429 |
+
|
| 430 |
+
train_loader = DataLoader(
|
| 431 |
+
train_dataset, batch_size=CONFIG['batch_size'], shuffle=True,
|
| 432 |
+
num_workers=CONFIG['num_workers'], pin_memory=CONFIG['pin_memory'],
|
| 433 |
+
persistent_workers=True, drop_last=True
|
| 434 |
+
)
|
| 435 |
+
val_loader = DataLoader(
|
| 436 |
+
val_dataset, batch_size=CONFIG['batch_size'], shuffle=False,
|
| 437 |
+
num_workers=CONFIG['num_workers'], pin_memory=CONFIG['pin_memory'],
|
| 438 |
+
persistent_workers=True
|
| 439 |
+
)
|
| 440 |
+
test_loader = DataLoader(
|
| 441 |
+
test_dataset, batch_size=CONFIG['batch_size'], shuffle=False,
|
| 442 |
+
num_workers=CONFIG['num_workers'], pin_memory=CONFIG['pin_memory'],
|
| 443 |
+
persistent_workers=True
|
| 444 |
+
)
|
| 445 |
+
|
| 446 |
+
print(f"Train batches: {len(train_loader):,}")
|
| 447 |
+
print()
|
| 448 |
+
|
| 449 |
+
# ============================================================
|
| 450 |
+
# MODEL (3 classes!)
|
| 451 |
+
# ============================================================
|
| 452 |
+
|
| 453 |
+
print("LOADING MODEL")
|
| 454 |
+
print("-" * 70)
|
| 455 |
+
|
| 456 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
| 457 |
+
CONFIG['model_name'],
|
| 458 |
+
num_labels=CONFIG['num_classes'], # 3 classes!
|
| 459 |
+
local_files_only=True
|
| 460 |
+
)
|
| 461 |
+
model = model.to(device)
|
| 462 |
+
print(f"Model loaded with {CONFIG['num_classes']} output classes")
|
| 463 |
+
print()
|
| 464 |
+
|
| 465 |
+
# ============================================================
|
| 466 |
+
# LOSS FUNCTION
|
| 467 |
+
# ============================================================
|
| 468 |
+
|
| 469 |
+
if class_weights is not None:
|
| 470 |
+
class_weights = class_weights.to(device)
|
| 471 |
+
|
| 472 |
+
criterion = FocalLoss(
|
| 473 |
+
num_classes=CONFIG['num_classes'],
|
| 474 |
+
gamma=CONFIG['focal_gamma'],
|
| 475 |
+
alpha=class_weights,
|
| 476 |
+
label_smoothing=CONFIG['label_smoothing']
|
| 477 |
+
)
|
| 478 |
+
|
| 479 |
+
print(f"Loss: Focal (γ={CONFIG['focal_gamma']}) + Label Smoothing ({CONFIG['label_smoothing']})")
|
| 480 |
+
print()
|
| 481 |
+
|
| 482 |
+
# ============================================================
|
| 483 |
+
# OPTIMIZER & SCHEDULER
|
| 484 |
+
# ============================================================
|
| 485 |
+
|
| 486 |
+
optimizer = torch.optim.AdamW(
|
| 487 |
+
model.parameters(),
|
| 488 |
+
lr=CONFIG['learning_rate'],
|
| 489 |
+
weight_decay=CONFIG['weight_decay']
|
| 490 |
+
)
|
| 491 |
+
|
| 492 |
+
accum_steps = CONFIG['gradient_accumulation_steps']
|
| 493 |
+
steps_per_epoch = len(train_loader) // accum_steps
|
| 494 |
+
total_steps = steps_per_epoch * CONFIG['epochs']
|
| 495 |
+
warmup_steps = int(total_steps * CONFIG['warmup_ratio'])
|
| 496 |
+
|
| 497 |
+
scheduler = get_scheduler(optimizer, CONFIG['scheduler_type'], total_steps, warmup_steps, CONFIG)
|
| 498 |
+
scaler = torch.amp.GradScaler('cuda') if CONFIG['use_amp'] else None
|
| 499 |
+
|
| 500 |
+
# ============================================================
|
| 501 |
+
# TRAINING LOOP
|
| 502 |
+
# ============================================================
|
| 503 |
+
|
| 504 |
+
print("=" * 70)
|
| 505 |
+
print("TRAINING")
|
| 506 |
+
print("=" * 70)
|
| 507 |
+
print()
|
| 508 |
+
|
| 509 |
+
best_val_acc = 0
|
| 510 |
+
best_val_loss = float('inf')
|
| 511 |
+
patience_counter = 0
|
| 512 |
+
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'lr': []}
|
| 513 |
+
total_train_time = 0
|
| 514 |
+
|
| 515 |
+
try:
|
| 516 |
+
for epoch in range(CONFIG['epochs']):
|
| 517 |
+
epoch_start = time.time()
|
| 518 |
+
|
| 519 |
+
# === TRAIN ===
|
| 520 |
+
model.train()
|
| 521 |
+
train_loss, train_correct, train_total = 0, 0, 0
|
| 522 |
+
optimizer.zero_grad()
|
| 523 |
+
|
| 524 |
+
pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [Train]", ncols=120)
|
| 525 |
+
|
| 526 |
+
for batch_idx, (input_ids, attention_mask, labels) in enumerate(pbar):
|
| 527 |
+
input_ids = input_ids.to(device, non_blocking=True)
|
| 528 |
+
attention_mask = attention_mask.to(device, non_blocking=True)
|
| 529 |
+
labels = labels.to(device, non_blocking=True)
|
| 530 |
+
|
| 531 |
+
if CONFIG['use_amp']:
|
| 532 |
+
with torch.amp.autocast('cuda'):
|
| 533 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
| 534 |
+
loss = criterion(outputs.logits, labels) / accum_steps
|
| 535 |
+
scaler.scale(loss).backward()
|
| 536 |
+
else:
|
| 537 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
| 538 |
+
loss = criterion(outputs.logits, labels) / accum_steps
|
| 539 |
+
loss.backward()
|
| 540 |
+
|
| 541 |
+
if (batch_idx + 1) % accum_steps == 0:
|
| 542 |
+
if CONFIG['use_amp']:
|
| 543 |
+
scaler.unscale_(optimizer)
|
| 544 |
+
torch.nn.utils.clip_grad_norm_(model.parameters(), CONFIG['max_grad_norm'])
|
| 545 |
+
scaler.step(optimizer)
|
| 546 |
+
scaler.update()
|
| 547 |
+
else:
|
| 548 |
+
torch.nn.utils.clip_grad_norm_(model.parameters(), CONFIG['max_grad_norm'])
|
| 549 |
+
optimizer.step()
|
| 550 |
+
scheduler.step()
|
| 551 |
+
optimizer.zero_grad()
|
| 552 |
+
|
| 553 |
+
train_loss += loss.item() * accum_steps
|
| 554 |
+
_, pred = outputs.logits.max(1)
|
| 555 |
+
train_total += labels.size(0)
|
| 556 |
+
train_correct += pred.eq(labels).sum().item()
|
| 557 |
+
|
| 558 |
+
pbar.set_postfix({
|
| 559 |
+
'loss': f'{train_loss/(batch_idx+1):.4f}',
|
| 560 |
+
'acc': f'{100*train_correct/train_total:.1f}%'
|
| 561 |
+
})
|
| 562 |
+
|
| 563 |
+
train_loss /= len(train_loader)
|
| 564 |
+
train_acc = 100 * train_correct / train_total
|
| 565 |
+
|
| 566 |
+
# === VALIDATION ===
|
| 567 |
+
model.eval()
|
| 568 |
+
val_loss, val_correct, val_total = 0, 0, 0
|
| 569 |
+
|
| 570 |
+
with torch.no_grad():
|
| 571 |
+
for input_ids, attention_mask, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [Val]", ncols=120):
|
| 572 |
+
input_ids = input_ids.to(device, non_blocking=True)
|
| 573 |
+
attention_mask = attention_mask.to(device, non_blocking=True)
|
| 574 |
+
labels = labels.to(device, non_blocking=True)
|
| 575 |
+
|
| 576 |
+
if CONFIG['use_amp']:
|
| 577 |
+
with torch.amp.autocast('cuda'):
|
| 578 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
| 579 |
+
loss = criterion(outputs.logits, labels)
|
| 580 |
+
else:
|
| 581 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
| 582 |
+
loss = criterion(outputs.logits, labels)
|
| 583 |
+
|
| 584 |
+
val_loss += loss.item()
|
| 585 |
+
_, pred = outputs.logits.max(1)
|
| 586 |
+
val_total += labels.size(0)
|
| 587 |
+
val_correct += pred.eq(labels).sum().item()
|
| 588 |
+
|
| 589 |
+
val_loss /= len(val_loader)
|
| 590 |
+
val_acc = 100 * val_correct / val_total
|
| 591 |
+
|
| 592 |
+
epoch_time = time.time() - epoch_start
|
| 593 |
+
total_train_time += epoch_time
|
| 594 |
+
|
| 595 |
+
history['train_loss'].append(train_loss)
|
| 596 |
+
history['train_acc'].append(train_acc)
|
| 597 |
+
history['val_loss'].append(val_loss)
|
| 598 |
+
history['val_acc'].append(val_acc)
|
| 599 |
+
history['lr'].append(scheduler.get_last_lr()[0])
|
| 600 |
+
|
| 601 |
+
# === EPOCH SUMMARY ===
|
| 602 |
+
print()
|
| 603 |
+
print(f" Epoch {epoch+1}: Train Loss={train_loss:.4f}, Acc={train_acc:.2f}% | Val Loss={val_loss:.4f}, Acc={val_acc:.2f}% | Time={epoch_time:.0f}s")
|
| 604 |
+
|
| 605 |
+
# Checkpointing
|
| 606 |
+
is_best = val_loss < best_val_loss
|
| 607 |
+
if is_best:
|
| 608 |
+
best_val_loss = val_loss
|
| 609 |
+
patience_counter = 0
|
| 610 |
+
else:
|
| 611 |
+
patience_counter += 1
|
| 612 |
+
|
| 613 |
+
if val_acc > best_val_acc:
|
| 614 |
+
best_val_acc = val_acc
|
| 615 |
+
|
| 616 |
+
if CONFIG['checkpoint_every_epoch']:
|
| 617 |
+
ckpt_path = os.path.join(CONFIG['checkpoint_dir'], f'checkpoint_epoch_{epoch+1}.pt')
|
| 618 |
+
save_checkpoint(model, tokenizer, optimizer, scheduler, scaler,
|
| 619 |
+
epoch+1, val_acc, val_loss, history, CONFIG, ckpt_path, is_best)
|
| 620 |
+
cleanup_old_checkpoints(CONFIG['checkpoint_dir'], CONFIG['save_total_limit'])
|
| 621 |
+
|
| 622 |
+
if is_best:
|
| 623 |
+
print(f" 🏆 New best model saved!")
|
| 624 |
+
|
| 625 |
+
if CONFIG['early_stopping'] and patience_counter >= CONFIG['early_stopping_patience']:
|
| 626 |
+
print(f"\n 🛑 Early stopping after {epoch+1} epochs")
|
| 627 |
+
break
|
| 628 |
+
|
| 629 |
+
print()
|
| 630 |
+
|
| 631 |
+
except Exception as e:
|
| 632 |
+
print(f"\n⚠️ Error: {e}")
|
| 633 |
+
emergency_dir = CONFIG['output_dir'] + '_emergency'
|
| 634 |
+
os.makedirs(emergency_dir, exist_ok=True)
|
| 635 |
+
model.save_pretrained(emergency_dir)
|
| 636 |
+
tokenizer.save_pretrained(emergency_dir)
|
| 637 |
+
raise
|
| 638 |
+
|
| 639 |
+
print("=" * 70)
|
| 640 |
+
print(f"TRAINING COMPLETE - {total_train_time/60:.1f} minutes")
|
| 641 |
+
print("=" * 70)
|
| 642 |
+
print()
|
| 643 |
+
|
| 644 |
+
# ============================================================
|
| 645 |
+
# FINAL TEST EVALUATION
|
| 646 |
+
# ============================================================
|
| 647 |
+
|
| 648 |
+
print("FINAL TEST EVALUATION")
|
| 649 |
+
print("-" * 70)
|
| 650 |
+
|
| 651 |
+
# Load best model (now works without fix!)
|
| 652 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
| 653 |
+
CONFIG['output_dir'], local_files_only=True
|
| 654 |
+
)
|
| 655 |
+
model = model.to(device)
|
| 656 |
+
model.eval()
|
| 657 |
+
|
| 658 |
+
all_preds, all_labels, all_probs = [], [], []
|
| 659 |
+
|
| 660 |
+
with torch.no_grad():
|
| 661 |
+
for input_ids, attention_mask, labels in tqdm(test_loader, desc="Testing"):
|
| 662 |
+
input_ids = input_ids.to(device)
|
| 663 |
+
attention_mask = attention_mask.to(device)
|
| 664 |
+
|
| 665 |
+
if CONFIG['use_amp']:
|
| 666 |
+
with torch.amp.autocast('cuda'):
|
| 667 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
| 668 |
+
else:
|
| 669 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
| 670 |
+
|
| 671 |
+
probs = F.softmax(outputs.logits, dim=-1)
|
| 672 |
+
_, preds = outputs.logits.max(1)
|
| 673 |
+
|
| 674 |
+
all_preds.extend(preds.cpu().numpy())
|
| 675 |
+
all_labels.extend(labels.numpy())
|
| 676 |
+
all_probs.extend(probs.cpu().numpy())
|
| 677 |
+
|
| 678 |
+
all_preds = np.array(all_preds)
|
| 679 |
+
all_labels = np.array(all_labels)
|
| 680 |
+
all_probs = np.array(all_probs)
|
| 681 |
+
|
| 682 |
+
test_acc = 100 * (all_preds == all_labels).mean()
|
| 683 |
+
|
| 684 |
+
print()
|
| 685 |
+
print(f"Test Accuracy: {test_acc:.2f}%")
|
| 686 |
+
print()
|
| 687 |
+
|
| 688 |
+
# Classification Report
|
| 689 |
+
print("CLASSIFICATION REPORT")
|
| 690 |
+
print("-" * 70)
|
| 691 |
+
report = classification_report(
|
| 692 |
+
all_labels, all_preds,
|
| 693 |
+
target_names=CONFIG['class_names'],
|
| 694 |
+
digits=3,
|
| 695 |
+
output_dict=True
|
| 696 |
+
)
|
| 697 |
+
print(classification_report(
|
| 698 |
+
all_labels, all_preds,
|
| 699 |
+
target_names=CONFIG['class_names'],
|
| 700 |
+
digits=3
|
| 701 |
+
))
|
| 702 |
+
|
| 703 |
+
# ============================================================
|
| 704 |
+
# TEACHER-FOCUSED METRICS
|
| 705 |
+
# ============================================================
|
| 706 |
+
|
| 707 |
+
print()
|
| 708 |
+
print("=" * 70)
|
| 709 |
+
print("📊 TEACHER-FOCUSED METRICS")
|
| 710 |
+
print("=" * 70)
|
| 711 |
+
print()
|
| 712 |
+
|
| 713 |
+
# Negative class recall (MOST IMPORTANT for teachers)
|
| 714 |
+
negative_recall = report['Negative']['recall'] * 100
|
| 715 |
+
negative_precision = report['Negative']['precision'] * 100
|
| 716 |
+
|
| 717 |
+
print(f" 🔴 NEGATIVE FEEDBACK DETECTION (Struggling Students):")
|
| 718 |
+
print(f" Recall: {negative_recall:.1f}% ← {negative_recall:.0f}% of struggling students caught")
|
| 719 |
+
print(f" Precision: {negative_precision:.1f}% ← {negative_precision:.0f}% of flags are real issues")
|
| 720 |
+
print()
|
| 721 |
+
|
| 722 |
+
# False negative analysis (missed struggling students)
|
| 723 |
+
false_negatives = ((all_labels == 0) & (all_preds != 0)).sum()
|
| 724 |
+
total_negatives = (all_labels == 0).sum()
|
| 725 |
+
missed_pct = 100 * false_negatives / total_negatives
|
| 726 |
+
|
| 727 |
+
print(f" ⚠️ MISSED STRUGGLING STUDENTS:")
|
| 728 |
+
print(f" {false_negatives:,} of {total_negatives:,} negative cases missed ({missed_pct:.1f}%)")
|
| 729 |
+
print()
|
| 730 |
+
|
| 731 |
+
# Confidence analysis
|
| 732 |
+
pred_confidence = all_probs.max(axis=1)
|
| 733 |
+
low_confidence = (pred_confidence < 0.7).sum()
|
| 734 |
+
low_conf_pct = 100 * low_confidence / len(pred_confidence)
|
| 735 |
+
|
| 736 |
+
print(f" 🤔 UNCERTAIN PREDICTIONS (confidence < 70%):")
|
| 737 |
+
print(f" {low_confidence:,} of {len(pred_confidence):,} predictions ({low_conf_pct:.1f}%)")
|
| 738 |
+
print(f" → These should be flagged for manual review")
|
| 739 |
+
print()
|
| 740 |
+
|
| 741 |
+
# ============================================================
|
| 742 |
+
# PLOTS
|
| 743 |
+
# ============================================================
|
| 744 |
+
|
| 745 |
+
# Confusion Matrix
|
| 746 |
+
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
|
| 747 |
+
|
| 748 |
+
cm = confusion_matrix(all_labels, all_preds)
|
| 749 |
+
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
|
| 750 |
+
xticklabels=CONFIG['class_names'],
|
| 751 |
+
yticklabels=CONFIG['class_names'], ax=axes[0])
|
| 752 |
+
axes[0].set_xlabel('Predicted')
|
| 753 |
+
axes[0].set_ylabel('Actual')
|
| 754 |
+
axes[0].set_title('Confusion Matrix (Counts)')
|
| 755 |
+
|
| 756 |
+
cm_norm = cm.astype(float) / cm.sum(axis=1, keepdims=True)
|
| 757 |
+
sns.heatmap(cm_norm, annot=True, fmt='.1%', cmap='Blues',
|
| 758 |
+
xticklabels=CONFIG['class_names'],
|
| 759 |
+
yticklabels=CONFIG['class_names'], ax=axes[1])
|
| 760 |
+
axes[1].set_xlabel('Predicted')
|
| 761 |
+
axes[1].set_ylabel('Actual')
|
| 762 |
+
axes[1].set_title('Confusion Matrix (Recall)')
|
| 763 |
+
|
| 764 |
+
plt.tight_layout()
|
| 765 |
+
plt.savefig('plots/confusion_matrix_3class.png', dpi=150)
|
| 766 |
+
print("✓ Saved: plots/confusion_matrix_3class.png")
|
| 767 |
+
|
| 768 |
+
# Per-class metrics
|
| 769 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 770 |
+
x = np.arange(3)
|
| 771 |
+
width = 0.25
|
| 772 |
+
|
| 773 |
+
recalls = [report[c]['recall']*100 for c in CONFIG['class_names']]
|
| 774 |
+
precisions = [report[c]['precision']*100 for c in CONFIG['class_names']]
|
| 775 |
+
f1s = [report[c]['f1-score']*100 for c in CONFIG['class_names']]
|
| 776 |
+
|
| 777 |
+
bars1 = ax.bar(x - width, recalls, width, label='Recall', color='#e74c3c')
|
| 778 |
+
bars2 = ax.bar(x, precisions, width, label='Precision', color='#3498db')
|
| 779 |
+
bars3 = ax.bar(x + width, f1s, width, label='F1-Score', color='#2ecc71')
|
| 780 |
+
|
| 781 |
+
ax.set_ylabel('Score (%)')
|
| 782 |
+
ax.set_title('Per-Class Metrics (3-Class Model)')
|
| 783 |
+
ax.set_xticks(x)
|
| 784 |
+
ax.set_xticklabels(['🔴 Negative\n(Needs Attention)', '🟡 Neutral\n(Mixed)', '🟢 Positive\n(Satisfied)'])
|
| 785 |
+
ax.legend()
|
| 786 |
+
ax.set_ylim(0, 105)
|
| 787 |
+
ax.axhline(y=80, color='gray', linestyle='--', alpha=0.5)
|
| 788 |
+
|
| 789 |
+
for bars in [bars1, bars2, bars3]:
|
| 790 |
+
for bar in bars:
|
| 791 |
+
height = bar.get_height()
|
| 792 |
+
ax.annotate(f'{height:.0f}%', xy=(bar.get_x() + bar.get_width()/2, height),
|
| 793 |
+
xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=9)
|
| 794 |
+
|
| 795 |
+
plt.tight_layout()
|
| 796 |
+
plt.savefig('plots/per_class_metrics_3class.png', dpi=150)
|
| 797 |
+
print("✓ Saved: plots/per_class_metrics_3class.png")
|
| 798 |
+
|
| 799 |
+
# Training history
|
| 800 |
+
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
|
| 801 |
+
epochs_range = range(1, len(history['train_loss']) + 1)
|
| 802 |
+
|
| 803 |
+
axes[0].plot(epochs_range, history['train_loss'], 'b-o', label='Train')
|
| 804 |
+
axes[0].plot(epochs_range, history['val_loss'], 'r-o', label='Val')
|
| 805 |
+
axes[0].set_xlabel('Epoch')
|
| 806 |
+
axes[0].set_ylabel('Loss')
|
| 807 |
+
axes[0].set_title('Training Loss')
|
| 808 |
+
axes[0].legend()
|
| 809 |
+
axes[0].grid(True, alpha=0.3)
|
| 810 |
+
|
| 811 |
+
axes[1].plot(epochs_range, history['train_acc'], 'b-o', label='Train')
|
| 812 |
+
axes[1].plot(epochs_range, history['val_acc'], 'r-o', label='Val')
|
| 813 |
+
axes[1].set_xlabel('Epoch')
|
| 814 |
+
axes[1].set_ylabel('Accuracy (%)')
|
| 815 |
+
axes[1].set_title('Training Accuracy')
|
| 816 |
+
axes[1].legend()
|
| 817 |
+
axes[1].grid(True, alpha=0.3)
|
| 818 |
+
|
| 819 |
+
plt.tight_layout()
|
| 820 |
+
plt.savefig('plots/training_history_3class.png', dpi=150)
|
| 821 |
+
print("✓ Saved: plots/training_history_3class.png")
|
| 822 |
+
|
| 823 |
+
# ============================================================
|
| 824 |
+
# SAVE RESULTS
|
| 825 |
+
# ============================================================
|
| 826 |
+
|
| 827 |
+
results = {
|
| 828 |
+
'test_accuracy': test_acc,
|
| 829 |
+
'negative_recall': negative_recall,
|
| 830 |
+
'negative_precision': negative_precision,
|
| 831 |
+
'missed_struggling_students': int(false_negatives),
|
| 832 |
+
'total_negative_cases': int(total_negatives),
|
| 833 |
+
'low_confidence_predictions': int(low_confidence),
|
| 834 |
+
'config': CONFIG,
|
| 835 |
+
'classification_report': report,
|
| 836 |
+
'training_time_minutes': total_train_time / 60,
|
| 837 |
+
}
|
| 838 |
+
|
| 839 |
+
torch.save(results, os.path.join(CONFIG['output_dir'], 'results.pt'))
|
| 840 |
+
|
| 841 |
+
with open(os.path.join(CONFIG['output_dir'], 'results.json'), 'w') as f:
|
| 842 |
+
save_results = {k: v for k, v in results.items() if k not in ['config', 'classification_report']}
|
| 843 |
+
save_results['per_class_recall'] = {c: report[c]['recall'] for c in CONFIG['class_names']}
|
| 844 |
+
json.dump(save_results, f, indent=2)
|
| 845 |
+
|
| 846 |
+
# ============================================================
|
| 847 |
+
# FINAL SUMMARY
|
| 848 |
+
# ============================================================
|
| 849 |
+
|
| 850 |
+
print()
|
| 851 |
+
print("=" * 70)
|
| 852 |
+
print("🎉 TRAINING COMPLETE!")
|
| 853 |
+
print("=" * 70)
|
| 854 |
+
print()
|
| 855 |
+
print(f" Model saved to: {CONFIG['output_dir']}/")
|
| 856 |
+
print()
|
| 857 |
+
print(" RESULTS:")
|
| 858 |
+
print(f" Test Accuracy: {test_acc:.1f}%")
|
| 859 |
+
print(f" Negative Recall: {negative_recall:.1f}% ← Catches {negative_recall:.0f}% of struggling students")
|
| 860 |
+
print(f" Negative Precision: {negative_precision:.1f}%")
|
| 861 |
+
print()
|
| 862 |
+
print(" PER-CLASS RECALL:")
|
| 863 |
+
for name in CONFIG['class_names']:
|
| 864 |
+
recall = report[name]['recall'] * 100
|
| 865 |
+
emoji = '🔴' if name == 'Negative' else ('🟡' if name == 'Neutral' else '🟢')
|
| 866 |
+
print(f" {emoji} {name}: {recall:.1f}%")
|
| 867 |
+
print()
|
| 868 |
+
print("=" * 70)
|
| 869 |
+
|
| 870 |
+
|
| 871 |
+
if __name__ == '__main__':
|
| 872 |
+
main()
|
dropout_binaryclass/correlation.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# coding: utf-8
|
| 3 |
+
"""
|
| 4 |
+
Feature Correlation Analysis
|
| 5 |
+
Helps identify redundant features and features most correlated with Target.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import numpy as np
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
import seaborn as sns
|
| 12 |
+
|
| 13 |
+
# =============================================================================
|
| 14 |
+
# 1. LOAD DATA
|
| 15 |
+
# =============================================================================
|
| 16 |
+
|
| 17 |
+
df = pd.read_csv('data.csv', sep=';')
|
| 18 |
+
df = df[df['Target'] != 'Enrolled']
|
| 19 |
+
df['Target'] = df['Target'].map({'Dropout': 0, 'Graduate': 1})
|
| 20 |
+
|
| 21 |
+
print(f"Dataset shape: {df.shape}")
|
| 22 |
+
print(f"Features: {df.shape[1] - 1}")
|
| 23 |
+
|
| 24 |
+
# =============================================================================
|
| 25 |
+
# 2. CORRELATION WITH TARGET
|
| 26 |
+
# =============================================================================
|
| 27 |
+
|
| 28 |
+
print("\n" + "="*70)
|
| 29 |
+
print("CORRELATION WITH TARGET (Dropout=0, Graduate=1)")
|
| 30 |
+
print("="*70)
|
| 31 |
+
|
| 32 |
+
# Calculate correlation with target
|
| 33 |
+
target_corr = df.corr()['Target'].drop('Target').sort_values(key=abs, ascending=False)
|
| 34 |
+
|
| 35 |
+
print("\nAll features ranked by absolute correlation with Target:\n")
|
| 36 |
+
for i, (feature, corr) in enumerate(target_corr.items(), 1):
|
| 37 |
+
strength = "STRONG" if abs(corr) > 0.3 else "MODERATE" if abs(corr) > 0.15 else "WEAK"
|
| 38 |
+
print(f"{i:2d}. {feature:50s} {corr:+.4f} [{strength}]")
|
| 39 |
+
|
| 40 |
+
# Plot correlation with target
|
| 41 |
+
plt.figure(figsize=(12, 10))
|
| 42 |
+
colors = ['green' if c > 0 else 'red' for c in target_corr.values]
|
| 43 |
+
target_corr.plot(kind='barh', color=colors)
|
| 44 |
+
plt.title('Feature Correlation with Target (Graduate=1)')
|
| 45 |
+
plt.xlabel('Correlation Coefficient')
|
| 46 |
+
plt.axvline(x=0, color='black', linewidth=0.5)
|
| 47 |
+
plt.axvline(x=0.3, color='blue', linestyle='--', alpha=0.5, label='Strong threshold')
|
| 48 |
+
plt.axvline(x=-0.3, color='blue', linestyle='--', alpha=0.5)
|
| 49 |
+
plt.tight_layout()
|
| 50 |
+
plt.savefig('correlation_with_target.png', dpi=150)
|
| 51 |
+
plt.show()
|
| 52 |
+
|
| 53 |
+
# =============================================================================
|
| 54 |
+
# 3. FEATURE-TO-FEATURE CORRELATION (Find Redundant Features)
|
| 55 |
+
# =============================================================================
|
| 56 |
+
|
| 57 |
+
print("\n" + "="*70)
|
| 58 |
+
print("HIGHLY CORRELATED FEATURE PAIRS (Potential Redundancy)")
|
| 59 |
+
print("="*70)
|
| 60 |
+
|
| 61 |
+
# Calculate correlation matrix
|
| 62 |
+
corr_matrix = df.drop('Target', axis=1).corr()
|
| 63 |
+
|
| 64 |
+
# Find highly correlated pairs
|
| 65 |
+
high_corr_pairs = []
|
| 66 |
+
threshold = 0.7
|
| 67 |
+
|
| 68 |
+
for i in range(len(corr_matrix.columns)):
|
| 69 |
+
for j in range(i+1, len(corr_matrix.columns)):
|
| 70 |
+
corr_value = corr_matrix.iloc[i, j]
|
| 71 |
+
if abs(corr_value) >= threshold:
|
| 72 |
+
high_corr_pairs.append({
|
| 73 |
+
'Feature 1': corr_matrix.columns[i],
|
| 74 |
+
'Feature 2': corr_matrix.columns[j],
|
| 75 |
+
'Correlation': corr_value
|
| 76 |
+
})
|
| 77 |
+
|
| 78 |
+
high_corr_df = pd.DataFrame(high_corr_pairs).sort_values('Correlation', key=abs, ascending=False)
|
| 79 |
+
|
| 80 |
+
print(f"\nFeature pairs with correlation >= {threshold}:\n")
|
| 81 |
+
if len(high_corr_df) > 0:
|
| 82 |
+
for _, row in high_corr_df.iterrows():
|
| 83 |
+
print(f" {row['Correlation']:+.4f} | {row['Feature 1']}")
|
| 84 |
+
print(f" | {row['Feature 2']}")
|
| 85 |
+
print()
|
| 86 |
+
else:
|
| 87 |
+
print(" No highly correlated pairs found.")
|
| 88 |
+
|
| 89 |
+
# =============================================================================
|
| 90 |
+
# 4. CORRELATION HEATMAP
|
| 91 |
+
# =============================================================================
|
| 92 |
+
|
| 93 |
+
plt.figure(figsize=(20, 16))
|
| 94 |
+
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm',
|
| 95 |
+
center=0, square=True, linewidths=0.5,
|
| 96 |
+
annot_kws={'size': 6})
|
| 97 |
+
plt.title('Feature Correlation Matrix')
|
| 98 |
+
plt.tight_layout()
|
| 99 |
+
plt.savefig('correlation_matrix.png', dpi=150)
|
| 100 |
+
plt.show()
|
| 101 |
+
|
| 102 |
+
# =============================================================================
|
| 103 |
+
# 5. RECOMMENDATIONS FOR FEATURE SELECTION
|
| 104 |
+
# =============================================================================
|
| 105 |
+
|
| 106 |
+
print("\n" + "="*70)
|
| 107 |
+
print("FEATURE SELECTION RECOMMENDATIONS")
|
| 108 |
+
print("="*70)
|
| 109 |
+
|
| 110 |
+
# Weak correlation with target (candidates for removal)
|
| 111 |
+
weak_threshold = 0.05
|
| 112 |
+
weak_features = target_corr[abs(target_corr) < weak_threshold]
|
| 113 |
+
|
| 114 |
+
print(f"\n1. WEAK CORRELATION WITH TARGET (|corr| < {weak_threshold}):")
|
| 115 |
+
print(" Consider removing these - they may not help prediction:\n")
|
| 116 |
+
for feature, corr in weak_features.items():
|
| 117 |
+
print(f" - {feature}: {corr:+.4f}")
|
| 118 |
+
|
| 119 |
+
# Features to keep (strong correlation)
|
| 120 |
+
strong_threshold = 0.2
|
| 121 |
+
strong_features = target_corr[abs(target_corr) >= strong_threshold]
|
| 122 |
+
|
| 123 |
+
print(f"\n2. STRONG CORRELATION WITH TARGET (|corr| >= {strong_threshold}):")
|
| 124 |
+
print(" Keep these - they are predictive:\n")
|
| 125 |
+
for feature, corr in strong_features.items():
|
| 126 |
+
print(f" + {feature}: {corr:+.4f}")
|
| 127 |
+
|
| 128 |
+
# Redundant features (high correlation with each other)
|
| 129 |
+
print(f"\n3. REDUNDANT FEATURES (correlated with each other >= {threshold}):")
|
| 130 |
+
print(" Consider keeping only one from each pair:\n")
|
| 131 |
+
for _, row in high_corr_df.iterrows():
|
| 132 |
+
# Suggest keeping the one more correlated with target
|
| 133 |
+
corr1 = abs(target_corr.get(row['Feature 1'], 0))
|
| 134 |
+
corr2 = abs(target_corr.get(row['Feature 2'], 0))
|
| 135 |
+
keep = row['Feature 1'] if corr1 >= corr2 else row['Feature 2']
|
| 136 |
+
drop = row['Feature 2'] if corr1 >= corr2 else row['Feature 1']
|
| 137 |
+
print(f" KEEP: {keep} (target corr: {target_corr.get(keep, 0):+.4f})")
|
| 138 |
+
print(f" DROP: {drop} (target corr: {target_corr.get(drop, 0):+.4f})")
|
| 139 |
+
print()
|
| 140 |
+
|
| 141 |
+
# =============================================================================
|
| 142 |
+
# 6. SUGGESTED FEATURES TO DROP
|
| 143 |
+
# =============================================================================
|
| 144 |
+
|
| 145 |
+
print("\n" + "="*70)
|
| 146 |
+
print("SUGGESTED FEATURES TO DROP")
|
| 147 |
+
print("="*70)
|
| 148 |
+
|
| 149 |
+
features_to_drop = set()
|
| 150 |
+
|
| 151 |
+
# Add weak features
|
| 152 |
+
for f in weak_features.index:
|
| 153 |
+
features_to_drop.add(f)
|
| 154 |
+
|
| 155 |
+
# Add redundant features (the one less correlated with target)
|
| 156 |
+
for _, row in high_corr_df.iterrows():
|
| 157 |
+
corr1 = abs(target_corr.get(row['Feature 1'], 0))
|
| 158 |
+
corr2 = abs(target_corr.get(row['Feature 2'], 0))
|
| 159 |
+
drop = row['Feature 2'] if corr1 >= corr2 else row['Feature 1']
|
| 160 |
+
features_to_drop.add(drop)
|
| 161 |
+
|
| 162 |
+
print(f"\nBased on analysis, consider dropping these {len(features_to_drop)} features:\n")
|
| 163 |
+
for f in features_to_drop:
|
| 164 |
+
reason = []
|
| 165 |
+
if f in weak_features.index:
|
| 166 |
+
reason.append(f"weak target corr ({target_corr[f]:+.4f})")
|
| 167 |
+
if f in [row['Feature 1'] for _, row in high_corr_df.iterrows()] or \
|
| 168 |
+
f in [row['Feature 2'] for _, row in high_corr_df.iterrows()]:
|
| 169 |
+
reason.append("redundant with another feature")
|
| 170 |
+
print(f" - {f}")
|
| 171 |
+
print(f" Reason: {', '.join(reason)}")
|
| 172 |
+
|
| 173 |
+
# Features to keep
|
| 174 |
+
features_to_keep = [f for f in target_corr.index if f not in features_to_drop]
|
| 175 |
+
|
| 176 |
+
print(f"\nKeep these {len(features_to_keep)} features:\n")
|
| 177 |
+
for f in features_to_keep:
|
| 178 |
+
print(f" + {f} (target corr: {target_corr[f]:+.4f})")
|
| 179 |
+
|
| 180 |
+
# =============================================================================
|
| 181 |
+
# 7. GENERATE CODE SNIPPET
|
| 182 |
+
# =============================================================================
|
| 183 |
+
|
| 184 |
+
print("\n" + "="*70)
|
| 185 |
+
print("CODE SNIPPET FOR YOUR TRAINING SCRIPT")
|
| 186 |
+
print("="*70)
|
| 187 |
+
|
| 188 |
+
print("\n# Copy this to your training script:")
|
| 189 |
+
print(f"columns_to_drop = {list(features_to_drop)}")
|
| 190 |
+
|
| 191 |
+
# =============================================================================
|
| 192 |
+
# 8. SAVE ANALYSIS RESULTS
|
| 193 |
+
# =============================================================================
|
| 194 |
+
|
| 195 |
+
# Save correlation with target
|
| 196 |
+
target_corr.to_csv('target_correlations.csv', header=['correlation'])
|
| 197 |
+
|
| 198 |
+
# Save high correlation pairs
|
| 199 |
+
if len(high_corr_df) > 0:
|
| 200 |
+
high_corr_df.to_csv('redundant_feature_pairs.csv', index=False)
|
| 201 |
+
|
| 202 |
+
# Save recommendations
|
| 203 |
+
with open('feature_selection_recommendations.txt', 'w') as f:
|
| 204 |
+
f.write("FEATURE SELECTION RECOMMENDATIONS\n")
|
| 205 |
+
f.write("="*50 + "\n\n")
|
| 206 |
+
f.write(f"Features to DROP ({len(features_to_drop)}):\n")
|
| 207 |
+
for feat in features_to_drop:
|
| 208 |
+
f.write(f" - {feat}\n")
|
| 209 |
+
f.write(f"\nFeatures to KEEP ({len(features_to_keep)}):\n")
|
| 210 |
+
for feat in features_to_keep:
|
| 211 |
+
f.write(f" + {feat}\n")
|
| 212 |
+
|
| 213 |
+
print("\nFiles saved:")
|
| 214 |
+
print(" 1. correlation_with_target.png")
|
| 215 |
+
print(" 2. correlation_matrix.png")
|
| 216 |
+
print(" 3. target_correlations.csv")
|
| 217 |
+
print(" 4. redundant_feature_pairs.csv")
|
| 218 |
+
print(" 5. feature_selection_recommendations.txt")
|
dropout_binaryclass/data.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dropout_binaryclass/feature_importance.png
ADDED
|
dropout_binaryclass/feature_selection_recommendations.txt
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FEATURE SELECTION RECOMMENDATIONS
|
| 2 |
+
==================================================
|
| 3 |
+
|
| 4 |
+
Features to DROP (17):
|
| 5 |
+
- Curricular units 2nd sem (enrolled)
|
| 6 |
+
- Curricular units 1st sem (credited)
|
| 7 |
+
- Nacionality
|
| 8 |
+
- Mother's occupation
|
| 9 |
+
- Curricular units 1st sem (approved)
|
| 10 |
+
- Educational special needs
|
| 11 |
+
- Inflation rate
|
| 12 |
+
- International
|
| 13 |
+
- Curricular units 2nd sem (credited)
|
| 14 |
+
- Curricular units 2nd sem (grade)
|
| 15 |
+
- Course
|
| 16 |
+
- Curricular units 1st sem (enrolled)
|
| 17 |
+
- Father's occupation
|
| 18 |
+
- Curricular units 1st sem (evaluations)
|
| 19 |
+
- Curricular units 1st sem (grade)
|
| 20 |
+
- Father's qualification
|
| 21 |
+
- Unemployment rate
|
| 22 |
+
|
| 23 |
+
Features to KEEP (19):
|
| 24 |
+
+ Curricular units 2nd sem (approved)
|
| 25 |
+
+ Tuition fees up to date
|
| 26 |
+
+ Scholarship holder
|
| 27 |
+
+ Age at enrollment
|
| 28 |
+
+ Debtor
|
| 29 |
+
+ Gender
|
| 30 |
+
+ Application mode
|
| 31 |
+
+ Admission grade
|
| 32 |
+
+ Displaced
|
| 33 |
+
+ Curricular units 2nd sem (evaluations)
|
| 34 |
+
+ Previous qualification (grade)
|
| 35 |
+
+ Curricular units 2nd sem (without evaluations)
|
| 36 |
+
+ Marital status
|
| 37 |
+
+ Application order
|
| 38 |
+
+ Daytime/evening attendance
|
| 39 |
+
+ Curricular units 1st sem (without evaluations)
|
| 40 |
+
+ Previous qualification
|
| 41 |
+
+ Mother's qualification
|
| 42 |
+
+ GDP
|
dropout_binaryclass/model_config.json
ADDED
|
@@ -0,0 +1,411 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "Student Dropout Prediction Model",
|
| 3 |
+
"model_type": "LogisticRegression with StandardScaler",
|
| 4 |
+
"target_mapping": {
|
| 5 |
+
"0": "Dropout",
|
| 6 |
+
"1": "Graduate"
|
| 7 |
+
},
|
| 8 |
+
"features": [
|
| 9 |
+
"Marital status",
|
| 10 |
+
"Application mode",
|
| 11 |
+
"Application order",
|
| 12 |
+
"Course",
|
| 13 |
+
"Daytime/evening attendance\t",
|
| 14 |
+
"Previous qualification",
|
| 15 |
+
"Previous qualification (grade)",
|
| 16 |
+
"Nacionality",
|
| 17 |
+
"Mother's qualification",
|
| 18 |
+
"Father's qualification",
|
| 19 |
+
"Mother's occupation",
|
| 20 |
+
"Admission grade",
|
| 21 |
+
"Displaced",
|
| 22 |
+
"Educational special needs",
|
| 23 |
+
"Debtor",
|
| 24 |
+
"Tuition fees up to date",
|
| 25 |
+
"Gender",
|
| 26 |
+
"Scholarship holder",
|
| 27 |
+
"Age at enrollment",
|
| 28 |
+
"International",
|
| 29 |
+
"Curricular units 1st sem (credited)",
|
| 30 |
+
"Curricular units 1st sem (enrolled)",
|
| 31 |
+
"Curricular units 1st sem (evaluations)",
|
| 32 |
+
"Curricular units 1st sem (approved)",
|
| 33 |
+
"Curricular units 1st sem (grade)",
|
| 34 |
+
"Curricular units 1st sem (without evaluations)",
|
| 35 |
+
"Curricular units 2nd sem (evaluations)",
|
| 36 |
+
"Curricular units 2nd sem (grade)",
|
| 37 |
+
"Curricular units 2nd sem (without evaluations)",
|
| 38 |
+
"Unemployment rate",
|
| 39 |
+
"Inflation rate",
|
| 40 |
+
"GDP"
|
| 41 |
+
],
|
| 42 |
+
"num_features": 32,
|
| 43 |
+
"dropped_columns": [
|
| 44 |
+
"Father's occupation",
|
| 45 |
+
"Curricular units 2nd sem (credited)",
|
| 46 |
+
"Curricular units 2nd sem (enrolled)",
|
| 47 |
+
"Curricular units 2nd sem (approved)"
|
| 48 |
+
],
|
| 49 |
+
"feature_details": {
|
| 50 |
+
"Marital status": {
|
| 51 |
+
"dtype": "int64",
|
| 52 |
+
"min": 1.0,
|
| 53 |
+
"max": 6.0,
|
| 54 |
+
"mean": 1.184297520661157,
|
| 55 |
+
"example_value": 1
|
| 56 |
+
},
|
| 57 |
+
"Application mode": {
|
| 58 |
+
"dtype": "int64",
|
| 59 |
+
"min": 1.0,
|
| 60 |
+
"max": 57.0,
|
| 61 |
+
"mean": 18.421763085399448,
|
| 62 |
+
"example_value": 17
|
| 63 |
+
},
|
| 64 |
+
"Application order": {
|
| 65 |
+
"dtype": "int64",
|
| 66 |
+
"min": 0.0,
|
| 67 |
+
"max": 6.0,
|
| 68 |
+
"mean": 1.750137741046832,
|
| 69 |
+
"example_value": 5
|
| 70 |
+
},
|
| 71 |
+
"Course": {
|
| 72 |
+
"dtype": "int64",
|
| 73 |
+
"min": 33.0,
|
| 74 |
+
"max": 9991.0,
|
| 75 |
+
"mean": 8853.980991735538,
|
| 76 |
+
"example_value": 171
|
| 77 |
+
},
|
| 78 |
+
"Daytime/evening attendance\t": {
|
| 79 |
+
"dtype": "int64",
|
| 80 |
+
"min": 0.0,
|
| 81 |
+
"max": 1.0,
|
| 82 |
+
"mean": 0.8876033057851239,
|
| 83 |
+
"example_value": 1
|
| 84 |
+
},
|
| 85 |
+
"Previous qualification": {
|
| 86 |
+
"dtype": "int64",
|
| 87 |
+
"min": 1.0,
|
| 88 |
+
"max": 43.0,
|
| 89 |
+
"mean": 4.532231404958678,
|
| 90 |
+
"example_value": 1
|
| 91 |
+
},
|
| 92 |
+
"Previous qualification (grade)": {
|
| 93 |
+
"dtype": "int64",
|
| 94 |
+
"min": 95.0,
|
| 95 |
+
"max": 190.0,
|
| 96 |
+
"mean": 132.90881542699725,
|
| 97 |
+
"example_value": 122
|
| 98 |
+
},
|
| 99 |
+
"Nacionality": {
|
| 100 |
+
"dtype": "int64",
|
| 101 |
+
"min": 1.0,
|
| 102 |
+
"max": 109.0,
|
| 103 |
+
"mean": 1.828099173553719,
|
| 104 |
+
"example_value": 1
|
| 105 |
+
},
|
| 106 |
+
"Mother's qualification": {
|
| 107 |
+
"dtype": "int64",
|
| 108 |
+
"min": 1.0,
|
| 109 |
+
"max": 44.0,
|
| 110 |
+
"mean": 19.986225895316803,
|
| 111 |
+
"example_value": 19
|
| 112 |
+
},
|
| 113 |
+
"Father's qualification": {
|
| 114 |
+
"dtype": "int64",
|
| 115 |
+
"min": 1.0,
|
| 116 |
+
"max": 44.0,
|
| 117 |
+
"mean": 22.57162534435262,
|
| 118 |
+
"example_value": 12
|
| 119 |
+
},
|
| 120 |
+
"Mother's occupation": {
|
| 121 |
+
"dtype": "int64",
|
| 122 |
+
"min": 0.0,
|
| 123 |
+
"max": 194.0,
|
| 124 |
+
"mean": 10.138567493112948,
|
| 125 |
+
"example_value": 5
|
| 126 |
+
},
|
| 127 |
+
"Admission grade": {
|
| 128 |
+
"dtype": "int64",
|
| 129 |
+
"min": 95.0,
|
| 130 |
+
"max": 190.0,
|
| 131 |
+
"mean": 127.28870523415978,
|
| 132 |
+
"example_value": 127
|
| 133 |
+
},
|
| 134 |
+
"Displaced": {
|
| 135 |
+
"dtype": "int64",
|
| 136 |
+
"min": 0.0,
|
| 137 |
+
"max": 1.0,
|
| 138 |
+
"mean": 0.5490358126721763,
|
| 139 |
+
"example_value": 1
|
| 140 |
+
},
|
| 141 |
+
"Educational special needs": {
|
| 142 |
+
"dtype": "int64",
|
| 143 |
+
"min": 0.0,
|
| 144 |
+
"max": 1.0,
|
| 145 |
+
"mean": 0.011019283746556474,
|
| 146 |
+
"example_value": 0
|
| 147 |
+
},
|
| 148 |
+
"Debtor": {
|
| 149 |
+
"dtype": "int64",
|
| 150 |
+
"min": 0.0,
|
| 151 |
+
"max": 1.0,
|
| 152 |
+
"mean": 0.1137741046831956,
|
| 153 |
+
"example_value": 0
|
| 154 |
+
},
|
| 155 |
+
"Tuition fees up to date": {
|
| 156 |
+
"dtype": "int64",
|
| 157 |
+
"min": 0.0,
|
| 158 |
+
"max": 1.0,
|
| 159 |
+
"mean": 0.8661157024793389,
|
| 160 |
+
"example_value": 1
|
| 161 |
+
},
|
| 162 |
+
"Gender": {
|
| 163 |
+
"dtype": "int64",
|
| 164 |
+
"min": 0.0,
|
| 165 |
+
"max": 1.0,
|
| 166 |
+
"mean": 0.3440771349862259,
|
| 167 |
+
"example_value": 1
|
| 168 |
+
},
|
| 169 |
+
"Scholarship holder": {
|
| 170 |
+
"dtype": "int64",
|
| 171 |
+
"min": 0.0,
|
| 172 |
+
"max": 1.0,
|
| 173 |
+
"mean": 0.26694214876033057,
|
| 174 |
+
"example_value": 0
|
| 175 |
+
},
|
| 176 |
+
"Age at enrollment": {
|
| 177 |
+
"dtype": "int64",
|
| 178 |
+
"min": 17.0,
|
| 179 |
+
"max": 70.0,
|
| 180 |
+
"mean": 23.461157024793387,
|
| 181 |
+
"example_value": 20
|
| 182 |
+
},
|
| 183 |
+
"International": {
|
| 184 |
+
"dtype": "int64",
|
| 185 |
+
"min": 0.0,
|
| 186 |
+
"max": 1.0,
|
| 187 |
+
"mean": 0.023691460055096418,
|
| 188 |
+
"example_value": 0
|
| 189 |
+
},
|
| 190 |
+
"Curricular units 1st sem (credited)": {
|
| 191 |
+
"dtype": "int64",
|
| 192 |
+
"min": 0.0,
|
| 193 |
+
"max": 20.0,
|
| 194 |
+
"mean": 0.7542699724517906,
|
| 195 |
+
"example_value": 0
|
| 196 |
+
},
|
| 197 |
+
"Curricular units 1st sem (enrolled)": {
|
| 198 |
+
"dtype": "int64",
|
| 199 |
+
"min": 0.0,
|
| 200 |
+
"max": 26.0,
|
| 201 |
+
"mean": 6.337465564738292,
|
| 202 |
+
"example_value": 0
|
| 203 |
+
},
|
| 204 |
+
"Curricular units 1st sem (evaluations)": {
|
| 205 |
+
"dtype": "int64",
|
| 206 |
+
"min": 0.0,
|
| 207 |
+
"max": 45.0,
|
| 208 |
+
"mean": 8.071074380165289,
|
| 209 |
+
"example_value": 0
|
| 210 |
+
},
|
| 211 |
+
"Curricular units 1st sem (approved)": {
|
| 212 |
+
"dtype": "int64",
|
| 213 |
+
"min": 0.0,
|
| 214 |
+
"max": 26.0,
|
| 215 |
+
"mean": 4.791460055096419,
|
| 216 |
+
"example_value": 0
|
| 217 |
+
},
|
| 218 |
+
"Curricular units 1st sem (grade)": {
|
| 219 |
+
"dtype": "int64",
|
| 220 |
+
"min": 0.0,
|
| 221 |
+
"max": 19.0,
|
| 222 |
+
"mean": 10.539118457300276,
|
| 223 |
+
"example_value": 0
|
| 224 |
+
},
|
| 225 |
+
"Curricular units 1st sem (without evaluations)": {
|
| 226 |
+
"dtype": "int64",
|
| 227 |
+
"min": 0.0,
|
| 228 |
+
"max": 12.0,
|
| 229 |
+
"mean": 0.12892561983471074,
|
| 230 |
+
"example_value": 0
|
| 231 |
+
},
|
| 232 |
+
"Curricular units 2nd sem (evaluations)": {
|
| 233 |
+
"dtype": "int64",
|
| 234 |
+
"min": 0.0,
|
| 235 |
+
"max": 33.0,
|
| 236 |
+
"mean": 7.763085399449036,
|
| 237 |
+
"example_value": 0
|
| 238 |
+
},
|
| 239 |
+
"Curricular units 2nd sem (grade)": {
|
| 240 |
+
"dtype": "int64",
|
| 241 |
+
"min": 0.0,
|
| 242 |
+
"max": 19.0,
|
| 243 |
+
"mean": 10.038842975206611,
|
| 244 |
+
"example_value": 0
|
| 245 |
+
},
|
| 246 |
+
"Curricular units 2nd sem (without evaluations)": {
|
| 247 |
+
"dtype": "int64",
|
| 248 |
+
"min": 0.0,
|
| 249 |
+
"max": 12.0,
|
| 250 |
+
"mean": 0.14214876033057852,
|
| 251 |
+
"example_value": 0
|
| 252 |
+
},
|
| 253 |
+
"Unemployment rate": {
|
| 254 |
+
"dtype": "int64",
|
| 255 |
+
"min": 8.0,
|
| 256 |
+
"max": 16.0,
|
| 257 |
+
"mean": 11.682920110192837,
|
| 258 |
+
"example_value": 11
|
| 259 |
+
},
|
| 260 |
+
"Inflation rate": {
|
| 261 |
+
"dtype": "int64",
|
| 262 |
+
"min": -1.0,
|
| 263 |
+
"max": 4.0,
|
| 264 |
+
"mean": 1.215702479338843,
|
| 265 |
+
"example_value": 1
|
| 266 |
+
},
|
| 267 |
+
"GDP": {
|
| 268 |
+
"dtype": "int64",
|
| 269 |
+
"min": -4.0,
|
| 270 |
+
"max": 4.0,
|
| 271 |
+
"mean": 0.0418732782369146,
|
| 272 |
+
"example_value": 2
|
| 273 |
+
}
|
| 274 |
+
},
|
| 275 |
+
"model_performance": {
|
| 276 |
+
"avg_roc_auc": 0.9426,
|
| 277 |
+
"std_roc_auc": 0.0022,
|
| 278 |
+
"avg_accuracy": 0.8904,
|
| 279 |
+
"std_accuracy": 0.0123
|
| 280 |
+
},
|
| 281 |
+
"feature_importance": [
|
| 282 |
+
{
|
| 283 |
+
"feature": "Curricular units 1st sem (approved)",
|
| 284 |
+
"coefficient": 3.3163108538242474
|
| 285 |
+
},
|
| 286 |
+
{
|
| 287 |
+
"feature": "Curricular units 2nd sem (grade)",
|
| 288 |
+
"coefficient": 1.5439405534216617
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"feature": "Curricular units 1st sem (enrolled)",
|
| 292 |
+
"coefficient": -1.1411938218498847
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"feature": "Tuition fees up to date",
|
| 296 |
+
"coefficient": 0.9630826567928356
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"feature": "Curricular units 1st sem (credited)",
|
| 300 |
+
"coefficient": -0.8539015768167176
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
"feature": "Curricular units 2nd sem (evaluations)",
|
| 304 |
+
"coefficient": -0.6369395746417482
|
| 305 |
+
},
|
| 306 |
+
{
|
| 307 |
+
"feature": "Course",
|
| 308 |
+
"coefficient": -0.6055334597267776
|
| 309 |
+
},
|
| 310 |
+
{
|
| 311 |
+
"feature": "International",
|
| 312 |
+
"coefficient": 0.4993629811863151
|
| 313 |
+
},
|
| 314 |
+
{
|
| 315 |
+
"feature": "Curricular units 1st sem (grade)",
|
| 316 |
+
"coefficient": -0.4580579977450427
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"feature": "Debtor",
|
| 320 |
+
"coefficient": -0.3870319293027283
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"feature": "Nacionality",
|
| 324 |
+
"coefficient": -0.36386269065696214
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"feature": "Scholarship holder",
|
| 328 |
+
"coefficient": 0.3601197899922311
|
| 329 |
+
},
|
| 330 |
+
{
|
| 331 |
+
"feature": "Age at enrollment",
|
| 332 |
+
"coefficient": -0.29681419535938647
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"feature": "Gender",
|
| 336 |
+
"coefficient": -0.22961088968596147
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"feature": "Mother's occupation",
|
| 340 |
+
"coefficient": 0.20867097544620444
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"feature": "Displaced",
|
| 344 |
+
"coefficient": -0.19965059186513248
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"feature": "Curricular units 1st sem (without evaluations)",
|
| 348 |
+
"coefficient": 0.1878768453143166
|
| 349 |
+
},
|
| 350 |
+
{
|
| 351 |
+
"feature": "Previous qualification",
|
| 352 |
+
"coefficient": 0.1635268539723628
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"feature": "Application mode",
|
| 356 |
+
"coefficient": -0.13952867123465623
|
| 357 |
+
},
|
| 358 |
+
{
|
| 359 |
+
"feature": "Curricular units 1st sem (evaluations)",
|
| 360 |
+
"coefficient": 0.13005849075063863
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"feature": "Unemployment rate",
|
| 364 |
+
"coefficient": -0.12395327972323616
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"feature": "Curricular units 2nd sem (without evaluations)",
|
| 368 |
+
"coefficient": 0.11533489424236375
|
| 369 |
+
},
|
| 370 |
+
{
|
| 371 |
+
"feature": "Father's qualification",
|
| 372 |
+
"coefficient": 0.10277051413826378
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"feature": "GDP",
|
| 376 |
+
"coefficient": -0.09145115697113011
|
| 377 |
+
},
|
| 378 |
+
{
|
| 379 |
+
"feature": "Daytime/evening attendance\t",
|
| 380 |
+
"coefficient": -0.08582769046990661
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"feature": "Marital status",
|
| 384 |
+
"coefficient": 0.07586210175822407
|
| 385 |
+
},
|
| 386 |
+
{
|
| 387 |
+
"feature": "Previous qualification (grade)",
|
| 388 |
+
"coefficient": -0.07382604570456465
|
| 389 |
+
},
|
| 390 |
+
{
|
| 391 |
+
"feature": "Admission grade",
|
| 392 |
+
"coefficient": 0.06636622661157908
|
| 393 |
+
},
|
| 394 |
+
{
|
| 395 |
+
"feature": "Mother's qualification",
|
| 396 |
+
"coefficient": -0.05960602912137761
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"feature": "Application order",
|
| 400 |
+
"coefficient": -0.02756430990311611
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"feature": "Inflation rate",
|
| 404 |
+
"coefficient": 0.0016776856356872146
|
| 405 |
+
},
|
| 406 |
+
{
|
| 407 |
+
"feature": "Educational special needs",
|
| 408 |
+
"coefficient": -0.0004318043811183271
|
| 409 |
+
}
|
| 410 |
+
]
|
| 411 |
+
}
|
dropout_binaryclass/predict_students_dropout_and_academic_success_model.pkl
ADDED
|
File without changes
|
dropout_binaryclass/redundant_feature_pairs.csv
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Feature 1,Feature 2,Correlation
|
| 2 |
+
Curricular units 1st sem (credited),Curricular units 2nd sem (credited),0.9470934915899273
|
| 3 |
+
Curricular units 1st sem (enrolled),Curricular units 2nd sem (enrolled),0.9412864966294326
|
| 4 |
+
Curricular units 1st sem (approved),Curricular units 2nd sem (approved),0.9163339784914017
|
| 5 |
+
Mother's occupation,Father's occupation,0.8865682817307416
|
| 6 |
+
Curricular units 1st sem (grade),Curricular units 2nd sem (grade),0.8458637025340845
|
| 7 |
+
Nacionality,International,0.7973873767851265
|
| 8 |
+
Curricular units 1st sem (evaluations),Curricular units 2nd sem (evaluations),0.7906158307754103
|
| 9 |
+
Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),0.7868376275910449
|
| 10 |
+
Curricular units 1st sem (credited),Curricular units 1st sem (enrolled),0.7828630989223708
|
| 11 |
+
Curricular units 1st sem (enrolled),Curricular units 1st sem (approved),0.7735791213004372
|
| 12 |
+
Curricular units 1st sem (enrolled),Curricular units 2nd sem (credited),0.7632761218093532
|
| 13 |
+
Curricular units 1st sem (approved),Curricular units 2nd sem (enrolled),0.7373747998128278
|
| 14 |
+
Curricular units 1st sem (approved),Curricular units 1st sem (grade),0.7101565018864167
|
| 15 |
+
Curricular units 1st sem (approved),Curricular units 2nd sem (grade),0.7093678199762506
|
| 16 |
+
Curricular units 2nd sem (enrolled),Curricular units 2nd sem (approved),0.7044445310875675
|
dropout_binaryclass/target_correlations.csv
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,correlation
|
| 2 |
+
Curricular units 2nd sem (approved),0.6539952460991423
|
| 3 |
+
Curricular units 2nd sem (grade),0.6053501259229878
|
| 4 |
+
Curricular units 1st sem (approved),0.554880856533347
|
| 5 |
+
Curricular units 1st sem (grade),0.5199270935327744
|
| 6 |
+
Tuition fees up to date,0.4421375757680648
|
| 7 |
+
Scholarship holder,0.313017662589069
|
| 8 |
+
Age at enrollment,-0.2672293831633241
|
| 9 |
+
Debtor,-0.26720719892947853
|
| 10 |
+
Gender,-0.2519548119534265
|
| 11 |
+
Application mode,-0.24450719808426288
|
| 12 |
+
Curricular units 2nd sem (enrolled),0.18289654087432544
|
| 13 |
+
Curricular units 1st sem (enrolled),0.1610735163889365
|
| 14 |
+
Admission grade,0.128057716154513
|
| 15 |
+
Displaced,0.12611303526795542
|
| 16 |
+
Curricular units 2nd sem (evaluations),0.11923876678096997
|
| 17 |
+
Previous qualification (grade),0.10946365310011318
|
| 18 |
+
Curricular units 2nd sem (without evaluations),-0.1026868285766343
|
| 19 |
+
Marital status,-0.10047906625607986
|
| 20 |
+
Application order,0.09435462724757428
|
| 21 |
+
Daytime/evening attendance ,0.08449593574263146
|
| 22 |
+
Curricular units 1st sem (without evaluations),-0.07464226018538014
|
| 23 |
+
Previous qualification,-0.06232290259631596
|
| 24 |
+
Curricular units 1st sem (evaluations),0.05978625949022733
|
| 25 |
+
Mother's qualification,-0.053988794962507865
|
| 26 |
+
Curricular units 2nd sem (credited),0.052401971159116184
|
| 27 |
+
GDP,0.05026014681835994
|
| 28 |
+
Curricular units 1st sem (credited),0.04690001650294807
|
| 29 |
+
Course,0.038135402995266764
|
| 30 |
+
Inflation rate,-0.030325865974636136
|
| 31 |
+
Nacionality,-0.015516308396310501
|
| 32 |
+
Educational special needs,-0.007253654142177353
|
| 33 |
+
International,0.006181262165854279
|
| 34 |
+
Father's qualification,-0.005865479932260606
|
| 35 |
+
Father's occupation,0.005065525427310094
|
| 36 |
+
Unemployment rate,0.0041981052265261075
|
| 37 |
+
Mother's occupation,0.0007724443649592459
|
dropout_binaryclass/train.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dropout_binaryclass/train.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# coding: utf-8
|
| 3 |
+
"""
|
| 4 |
+
Student Dropout Prediction Model
|
| 5 |
+
Trains a Logistic Regression model and saves it with feature configuration.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import numpy as np
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
import seaborn as sns
|
| 12 |
+
import json
|
| 13 |
+
import joblib
|
| 14 |
+
|
| 15 |
+
from sklearn.model_selection import StratifiedKFold
|
| 16 |
+
from sklearn.linear_model import LogisticRegression
|
| 17 |
+
from sklearn.pipeline import Pipeline
|
| 18 |
+
from sklearn.preprocessing import StandardScaler
|
| 19 |
+
from sklearn.metrics import roc_auc_score, classification_report, accuracy_score
|
| 20 |
+
|
| 21 |
+
# =============================================================================
|
| 22 |
+
# 1. LOAD AND PREPROCESS DATA
|
| 23 |
+
# =============================================================================
|
| 24 |
+
|
| 25 |
+
# Load data
|
| 26 |
+
df = pd.read_csv('data.csv', sep=';')
|
| 27 |
+
print(f"Original dataset shape: {df.shape}")
|
| 28 |
+
|
| 29 |
+
# Filter out 'Enrolled' - keep only Dropout and Graduate
|
| 30 |
+
df = df[df['Target'] != 'Enrolled']
|
| 31 |
+
print(f"After filtering 'Enrolled': {df.shape}")
|
| 32 |
+
|
| 33 |
+
# Round numeric columns
|
| 34 |
+
df = df.round()
|
| 35 |
+
|
| 36 |
+
# Convert specific columns to int64
|
| 37 |
+
numeric_cols = [
|
| 38 |
+
'Admission grade',
|
| 39 |
+
'Previous qualification (grade)',
|
| 40 |
+
'Curricular units 1st sem (grade)',
|
| 41 |
+
'Curricular units 2nd sem (grade)',
|
| 42 |
+
'Unemployment rate',
|
| 43 |
+
'Inflation rate',
|
| 44 |
+
'GDP'
|
| 45 |
+
]
|
| 46 |
+
df[numeric_cols] = df[numeric_cols].astype(np.int64)
|
| 47 |
+
|
| 48 |
+
# Drop unnecessary columns (selected by your classmate)
|
| 49 |
+
columns_to_drop = [
|
| 50 |
+
"Father's occupation",
|
| 51 |
+
"Curricular units 2nd sem (credited)",
|
| 52 |
+
"Curricular units 2nd sem (enrolled)",
|
| 53 |
+
"Curricular units 2nd sem (approved)"
|
| 54 |
+
]
|
| 55 |
+
df.drop(columns=columns_to_drop, inplace=True)
|
| 56 |
+
|
| 57 |
+
# Transform Target column
|
| 58 |
+
df['Target'] = df['Target'].map({'Dropout': 0, 'Graduate': 1})
|
| 59 |
+
|
| 60 |
+
# Verify target transformation
|
| 61 |
+
print(f"\nTarget distribution:")
|
| 62 |
+
print(df['Target'].value_counts())
|
| 63 |
+
|
| 64 |
+
# Create features and target
|
| 65 |
+
x = df.drop('Target', axis=1)
|
| 66 |
+
y = df['Target'].astype(int)
|
| 67 |
+
|
| 68 |
+
print(f"\nFeatures shape: {x.shape}")
|
| 69 |
+
print(f"Target shape: {y.shape}")
|
| 70 |
+
|
| 71 |
+
# =============================================================================
|
| 72 |
+
# 2. DEFINE MODEL
|
| 73 |
+
# =============================================================================
|
| 74 |
+
|
| 75 |
+
model = Pipeline([
|
| 76 |
+
('scaler', StandardScaler()),
|
| 77 |
+
('clf', LogisticRegression(
|
| 78 |
+
penalty='l2',
|
| 79 |
+
C=1.0,
|
| 80 |
+
solver='lbfgs',
|
| 81 |
+
class_weight='balanced',
|
| 82 |
+
random_state=42,
|
| 83 |
+
max_iter=1000
|
| 84 |
+
))
|
| 85 |
+
])
|
| 86 |
+
|
| 87 |
+
# =============================================================================
|
| 88 |
+
# 3. CROSS-VALIDATION
|
| 89 |
+
# =============================================================================
|
| 90 |
+
|
| 91 |
+
print("\n" + "="*60)
|
| 92 |
+
print("CROSS-VALIDATION RESULTS")
|
| 93 |
+
print("="*60)
|
| 94 |
+
|
| 95 |
+
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
|
| 96 |
+
auc_roc_scores = []
|
| 97 |
+
acc_scores = []
|
| 98 |
+
|
| 99 |
+
for fold, (train_index, val_index) in enumerate(skf.split(x, y), 1):
|
| 100 |
+
x_train, x_val = x.iloc[train_index], x.iloc[val_index]
|
| 101 |
+
y_train, y_val = y.iloc[train_index], y.iloc[val_index]
|
| 102 |
+
|
| 103 |
+
model.fit(x_train, y_train)
|
| 104 |
+
|
| 105 |
+
y_pred = model.predict(x_val)
|
| 106 |
+
y_pred_proba = model.predict_proba(x_val)[:, 1]
|
| 107 |
+
|
| 108 |
+
auc_roc = roc_auc_score(y_val, y_pred_proba)
|
| 109 |
+
acc = accuracy_score(y_val, y_pred)
|
| 110 |
+
|
| 111 |
+
auc_roc_scores.append(auc_roc)
|
| 112 |
+
acc_scores.append(acc)
|
| 113 |
+
|
| 114 |
+
print(f"\nFold {fold}:")
|
| 115 |
+
print(f" Accuracy: {acc:.4f}, ROC-AUC: {auc_roc:.4f}")
|
| 116 |
+
|
| 117 |
+
print("\n" + "-"*60)
|
| 118 |
+
print(f"Average ROC-AUC: {np.mean(auc_roc_scores):.4f} ± {np.std(auc_roc_scores):.4f}")
|
| 119 |
+
print(f"Average Accuracy: {np.mean(acc_scores):.4f} ± {np.std(acc_scores):.4f}")
|
| 120 |
+
|
| 121 |
+
# =============================================================================
|
| 122 |
+
# 4. TRAIN FINAL MODEL ON ALL DATA
|
| 123 |
+
# =============================================================================
|
| 124 |
+
|
| 125 |
+
print("\n" + "="*60)
|
| 126 |
+
print("TRAINING FINAL MODEL ON ALL DATA")
|
| 127 |
+
print("="*60)
|
| 128 |
+
|
| 129 |
+
final_model = model.fit(x, y)
|
| 130 |
+
print("Final model trained successfully!")
|
| 131 |
+
|
| 132 |
+
# =============================================================================
|
| 133 |
+
# 5. FEATURE IMPORTANCE
|
| 134 |
+
# =============================================================================
|
| 135 |
+
|
| 136 |
+
classifier = final_model.named_steps['clf']
|
| 137 |
+
feature_importance = pd.DataFrame({
|
| 138 |
+
'feature': x.columns,
|
| 139 |
+
'coefficient': classifier.coef_[0]
|
| 140 |
+
}).sort_values('coefficient', key=abs, ascending=False)
|
| 141 |
+
|
| 142 |
+
print("\nTop 10 Most Important Features:")
|
| 143 |
+
print(feature_importance.head(10).to_string(index=False))
|
| 144 |
+
|
| 145 |
+
# Plot feature importance
|
| 146 |
+
plt.figure(figsize=(10, 6))
|
| 147 |
+
sns.barplot(data=feature_importance.head(10), x='coefficient', y='feature')
|
| 148 |
+
plt.title('Top 10 Feature Importance (Logistic Regression Coefficients)')
|
| 149 |
+
plt.tight_layout()
|
| 150 |
+
plt.savefig('feature_importance.png', dpi=150)
|
| 151 |
+
plt.show()
|
| 152 |
+
|
| 153 |
+
# =============================================================================
|
| 154 |
+
# 6. SAVE MODEL AND CONFIGURATION
|
| 155 |
+
# =============================================================================
|
| 156 |
+
|
| 157 |
+
print("\n" + "="*60)
|
| 158 |
+
print("SAVING MODEL AND CONFIGURATION")
|
| 159 |
+
print("="*60)
|
| 160 |
+
|
| 161 |
+
# Save model using joblib (better for sklearn models)
|
| 162 |
+
model_path = "student_dropout_model.pkl"
|
| 163 |
+
joblib.dump(final_model, model_path)
|
| 164 |
+
print(f"Model saved to: {model_path}")
|
| 165 |
+
|
| 166 |
+
# Create and save configuration
|
| 167 |
+
config = {
|
| 168 |
+
"model_name": "Student Dropout Prediction Model",
|
| 169 |
+
"model_type": "LogisticRegression with StandardScaler",
|
| 170 |
+
"target_mapping": {
|
| 171 |
+
"0": "Dropout",
|
| 172 |
+
"1": "Graduate"
|
| 173 |
+
},
|
| 174 |
+
"features": x.columns.tolist(),
|
| 175 |
+
"num_features": len(x.columns),
|
| 176 |
+
"dropped_columns": columns_to_drop,
|
| 177 |
+
"feature_details": {},
|
| 178 |
+
"model_performance": {
|
| 179 |
+
"avg_roc_auc": round(np.mean(auc_roc_scores), 4),
|
| 180 |
+
"std_roc_auc": round(np.std(auc_roc_scores), 4),
|
| 181 |
+
"avg_accuracy": round(np.mean(acc_scores), 4),
|
| 182 |
+
"std_accuracy": round(np.std(acc_scores), 4)
|
| 183 |
+
},
|
| 184 |
+
"feature_importance": feature_importance.to_dict('records')
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
# Add feature details (dtype, min, max, etc.)
|
| 188 |
+
for col in x.columns:
|
| 189 |
+
config["feature_details"][col] = {
|
| 190 |
+
"dtype": str(x[col].dtype),
|
| 191 |
+
"min": float(x[col].min()),
|
| 192 |
+
"max": float(x[col].max()),
|
| 193 |
+
"mean": float(x[col].mean()),
|
| 194 |
+
"example_value": int(x[col].iloc[0]) if x[col].dtype in ['int64', 'int32'] else float(x[col].iloc[0])
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
# Save configuration
|
| 198 |
+
config_path = "model_config.json"
|
| 199 |
+
with open(config_path, 'w') as f:
|
| 200 |
+
json.dump(config, f, indent=2)
|
| 201 |
+
print(f"Configuration saved to: {config_path}")
|
| 202 |
+
|
| 203 |
+
# =============================================================================
|
| 204 |
+
# 7. PRINT SUMMARY
|
| 205 |
+
# =============================================================================
|
| 206 |
+
|
| 207 |
+
print("\n" + "="*60)
|
| 208 |
+
print("SUMMARY: FEATURES YOUR CLASSMATE SELECTED")
|
| 209 |
+
print("="*60)
|
| 210 |
+
print(f"\nTotal features: {len(x.columns)}")
|
| 211 |
+
print("\nFeature list:")
|
| 212 |
+
for i, col in enumerate(x.columns, 1):
|
| 213 |
+
print(f" {i:2d}. {col}")
|
| 214 |
+
|
| 215 |
+
print(f"\nDropped columns:")
|
| 216 |
+
for col in columns_to_drop:
|
| 217 |
+
print(f" - {col}")
|
| 218 |
+
|
| 219 |
+
print("\n" + "="*60)
|
| 220 |
+
print("DONE! Files created:")
|
| 221 |
+
print(f" 1. {model_path} (trained model)")
|
| 222 |
+
print(f" 2. {config_path} (feature configuration)")
|
| 223 |
+
print(f" 3. feature_importance.png (visualization)")
|
| 224 |
+
print("="*60)
|
grade_multiclass/02_grade_distribution.png
ADDED
|
grade_multiclass/03_performance_index_distribution.png
ADDED
|
grade_multiclass/04_features_by_grade.png
ADDED
|
grade_multiclass/05_extracurricular_analysis.png
ADDED
|
grade_multiclass/06_correlation_heatmap.png
ADDED
|
grade_multiclass/09_feature_importance.png
ADDED
|
grade_multiclass/10_learning_curves.png
ADDED
|
grade_multiclass/11_model_comparison.png
ADDED
|
grade_multiclass/Student_Performance.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
grade_multiclass/correlation_heatmap.png
ADDED
|
grade_multiclass/feature_importance.png
ADDED
|
grade_multiclass/features_by_grade.png
ADDED
|
grade_multiclass/learning_curves.png
ADDED
|
grade_multiclass/model_comparison.png
ADDED
|
grade_multiclass/student_performance_classification.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
grade_multiclass/student_performance_classification.py
ADDED
|
@@ -0,0 +1,1100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# coding: utf-8
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
Student Performance Multi-Class Classification
|
| 6 |
+
==============================================
|
| 7 |
+
Predicting student grades from study habits, historical performance,
|
| 8 |
+
and lifestyle factors.
|
| 9 |
+
|
| 10 |
+
Dataset: 10,000 student records with 5 features
|
| 11 |
+
Target: Performance Index → Converted to letter grades (A/B/C/D/F)
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
# =============================================================================
|
| 15 |
+
# 1. IMPORTS AND CONFIGURATION
|
| 16 |
+
# =============================================================================
|
| 17 |
+
|
| 18 |
+
import pandas as pd
|
| 19 |
+
import numpy as np
|
| 20 |
+
import matplotlib.pyplot as plt
|
| 21 |
+
import seaborn as sns
|
| 22 |
+
import joblib
|
| 23 |
+
import warnings
|
| 24 |
+
from pathlib import Path
|
| 25 |
+
|
| 26 |
+
from sklearn.model_selection import (
|
| 27 |
+
train_test_split,
|
| 28 |
+
cross_val_score,
|
| 29 |
+
StratifiedKFold,
|
| 30 |
+
GridSearchCV,
|
| 31 |
+
learning_curve
|
| 32 |
+
)
|
| 33 |
+
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
|
| 34 |
+
from sklearn.compose import ColumnTransformer
|
| 35 |
+
from sklearn.pipeline import Pipeline
|
| 36 |
+
from sklearn.linear_model import LogisticRegression
|
| 37 |
+
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
|
| 38 |
+
from sklearn.metrics import (
|
| 39 |
+
classification_report,
|
| 40 |
+
confusion_matrix,
|
| 41 |
+
ConfusionMatrixDisplay,
|
| 42 |
+
accuracy_score,
|
| 43 |
+
f1_score
|
| 44 |
+
)
|
| 45 |
+
from sklearn.utils.class_weight import compute_class_weight
|
| 46 |
+
|
| 47 |
+
# Configuration
|
| 48 |
+
warnings.filterwarnings('ignore')
|
| 49 |
+
sns.set_theme(style="whitegrid", palette="muted")
|
| 50 |
+
plt.rcParams["figure.figsize"] = (10, 6)
|
| 51 |
+
RANDOM_STATE = 42
|
| 52 |
+
CV_FOLDS = 5
|
| 53 |
+
|
| 54 |
+
print("=" * 60)
|
| 55 |
+
print(" STUDENT PERFORMANCE CLASSIFICATION")
|
| 56 |
+
print(" Multi-Class Grade Prediction from Academic Factors")
|
| 57 |
+
print("=" * 60)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# =============================================================================
|
| 61 |
+
# 2. DATA LOADING AND INITIAL INSPECTION
|
| 62 |
+
# =============================================================================
|
| 63 |
+
|
| 64 |
+
def load_and_inspect_data(filepath: str) -> pd.DataFrame:
|
| 65 |
+
"""Load dataset and perform initial inspection."""
|
| 66 |
+
|
| 67 |
+
df = pd.read_csv(filepath)
|
| 68 |
+
|
| 69 |
+
print("\n📊 DATASET OVERVIEW")
|
| 70 |
+
print("-" * 40)
|
| 71 |
+
print(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns")
|
| 72 |
+
print(f"\nColumns: {list(df.columns)}")
|
| 73 |
+
print(f"\nData Types:\n{df.dtypes}")
|
| 74 |
+
print(f"\nMissing Values:\n{df.isnull().sum()}")
|
| 75 |
+
print(f"\nBasic Statistics:\n{df.describe()}")
|
| 76 |
+
|
| 77 |
+
# Check categorical column
|
| 78 |
+
print(f"\nExtracurricular Activities Distribution:")
|
| 79 |
+
print(df['Extracurricular Activities'].value_counts())
|
| 80 |
+
|
| 81 |
+
return df
|
| 82 |
+
|
| 83 |
+
# Load data
|
| 84 |
+
df = load_and_inspect_data('Student_Performance.csv')
|
| 85 |
+
print("\nFirst 10 rows:")
|
| 86 |
+
print(df.head(10))
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# =============================================================================
|
| 90 |
+
# 3. TARGET VARIABLE CREATION
|
| 91 |
+
# =============================================================================
|
| 92 |
+
|
| 93 |
+
def create_grade_labels(performance_index: pd.Series) -> pd.Series:
|
| 94 |
+
"""
|
| 95 |
+
Convert continuous Performance Index to letter grades.
|
| 96 |
+
|
| 97 |
+
Grading Scale:
|
| 98 |
+
A: 90-100
|
| 99 |
+
B: 80-89
|
| 100 |
+
C: 70-79
|
| 101 |
+
D: 60-69
|
| 102 |
+
F: 0-59
|
| 103 |
+
"""
|
| 104 |
+
bins = [0, 60, 70, 80, 90, 101]
|
| 105 |
+
labels = ['F', 'D', 'C', 'B', 'A']
|
| 106 |
+
|
| 107 |
+
grades = pd.cut(
|
| 108 |
+
performance_index,
|
| 109 |
+
bins=bins,
|
| 110 |
+
labels=labels,
|
| 111 |
+
right=False,
|
| 112 |
+
include_lowest=True
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
return grades
|
| 116 |
+
|
| 117 |
+
# Create target variable
|
| 118 |
+
df['grade'] = create_grade_labels(df['Performance Index'])
|
| 119 |
+
|
| 120 |
+
print("\n🎯 TARGET VARIABLE CREATED")
|
| 121 |
+
print("-" * 40)
|
| 122 |
+
print("Grade Distribution:")
|
| 123 |
+
grade_counts = df['grade'].value_counts().sort_index()
|
| 124 |
+
for grade in ['A', 'B', 'C', 'D', 'F']:
|
| 125 |
+
count = grade_counts.get(grade, 0)
|
| 126 |
+
pct = count / len(df) * 100
|
| 127 |
+
bar = "█" * int(pct / 2)
|
| 128 |
+
print(f" {grade}: {count:>5} ({pct:>5.2f}%) {bar}")
|
| 129 |
+
|
| 130 |
+
# Check imbalance
|
| 131 |
+
imbalance_ratio = grade_counts.max() / grade_counts.min()
|
| 132 |
+
print(f"\nImbalance Ratio: {imbalance_ratio:.2f}")
|
| 133 |
+
if imbalance_ratio > 10:
|
| 134 |
+
print("⚠️ Significant imbalance - will use class weights")
|
| 135 |
+
else:
|
| 136 |
+
print("✅ Classes are reasonably balanced")
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
# =============================================================================
|
| 140 |
+
# 4. EXPLORATORY DATA ANALYSIS
|
| 141 |
+
# =============================================================================
|
| 142 |
+
|
| 143 |
+
def perform_eda(df: pd.DataFrame):
|
| 144 |
+
"""Comprehensive exploratory data analysis."""
|
| 145 |
+
|
| 146 |
+
print("\n📈 EXPLORATORY DATA ANALYSIS")
|
| 147 |
+
print("=" * 60)
|
| 148 |
+
|
| 149 |
+
# Define feature groups
|
| 150 |
+
numerical_features = [
|
| 151 |
+
'Hours Studied',
|
| 152 |
+
'Previous Scores',
|
| 153 |
+
'Sleep Hours',
|
| 154 |
+
'Sample Question Papers Practiced'
|
| 155 |
+
]
|
| 156 |
+
categorical_features = ['Extracurricular Activities']
|
| 157 |
+
|
| 158 |
+
# 4.1 Numerical Feature Distributions
|
| 159 |
+
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
| 160 |
+
axes = axes.flatten()
|
| 161 |
+
|
| 162 |
+
for i, col in enumerate(numerical_features):
|
| 163 |
+
sns.histplot(df[col], kde=True, ax=axes[i], color='teal', bins=30)
|
| 164 |
+
axes[i].axvline(df[col].mean(), color='red', linestyle='--',
|
| 165 |
+
label=f'Mean: {df[col].mean():.1f}')
|
| 166 |
+
axes[i].axvline(df[col].median(), color='orange', linestyle='--',
|
| 167 |
+
label=f'Median: {df[col].median():.1f}')
|
| 168 |
+
axes[i].set_title(f'Distribution of {col}')
|
| 169 |
+
axes[i].legend()
|
| 170 |
+
|
| 171 |
+
plt.tight_layout()
|
| 172 |
+
plt.savefig('01_feature_distributions.png', dpi=150, bbox_inches='tight')
|
| 173 |
+
plt.show()
|
| 174 |
+
|
| 175 |
+
# 4.2 Target Distribution
|
| 176 |
+
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
|
| 177 |
+
|
| 178 |
+
grade_order = ['A', 'B', 'C', 'D', 'F']
|
| 179 |
+
grade_counts = df['grade'].value_counts().reindex(grade_order)
|
| 180 |
+
|
| 181 |
+
colors = sns.color_palette('RdYlGn_r', 5)
|
| 182 |
+
|
| 183 |
+
# Bar chart
|
| 184 |
+
bars = axes[0].bar(grade_order, grade_counts.values, color=colors)
|
| 185 |
+
axes[0].set_title('Grade Distribution', fontsize=14)
|
| 186 |
+
axes[0].set_xlabel('Grade')
|
| 187 |
+
axes[0].set_ylabel('Count')
|
| 188 |
+
for bar, count in zip(bars, grade_counts.values):
|
| 189 |
+
axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 20,
|
| 190 |
+
f'{count}', ha='center', fontsize=11)
|
| 191 |
+
|
| 192 |
+
# Pie chart
|
| 193 |
+
axes[1].pie(grade_counts, labels=grade_order, autopct='%1.1f%%',
|
| 194 |
+
colors=colors, explode=[0.02]*5)
|
| 195 |
+
axes[1].set_title('Grade Distribution (%)', fontsize=14)
|
| 196 |
+
|
| 197 |
+
plt.tight_layout()
|
| 198 |
+
plt.savefig('02_grade_distribution.png', dpi=150, bbox_inches='tight')
|
| 199 |
+
plt.show()
|
| 200 |
+
|
| 201 |
+
# 4.3 Performance Index Distribution (before binning)
|
| 202 |
+
plt.figure(figsize=(12, 5))
|
| 203 |
+
sns.histplot(df['Performance Index'], kde=True, bins=50, color='steelblue')
|
| 204 |
+
|
| 205 |
+
# Add grade boundary lines
|
| 206 |
+
boundaries = [60, 70, 80, 90]
|
| 207 |
+
boundary_labels = ['F/D', 'D/C', 'C/B', 'B/A']
|
| 208 |
+
for bound, label in zip(boundaries, boundary_labels):
|
| 209 |
+
plt.axvline(bound, color='red', linestyle='--', alpha=0.7)
|
| 210 |
+
plt.text(bound + 1, plt.gca().get_ylim()[1] * 0.9, label, fontsize=10)
|
| 211 |
+
|
| 212 |
+
plt.title('Performance Index Distribution with Grade Boundaries')
|
| 213 |
+
plt.xlabel('Performance Index')
|
| 214 |
+
plt.savefig('03_performance_index_distribution.png', dpi=150, bbox_inches='tight')
|
| 215 |
+
plt.show()
|
| 216 |
+
|
| 217 |
+
# 4.4 Features by Grade (Box Plots)
|
| 218 |
+
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
| 219 |
+
axes = axes.flatten()
|
| 220 |
+
|
| 221 |
+
for i, col in enumerate(numerical_features):
|
| 222 |
+
sns.boxplot(data=df, x='grade', y=col, order=grade_order,
|
| 223 |
+
hue='grade', palette='RdYlGn_r', legend=False, ax=axes[i])
|
| 224 |
+
axes[i].set_title(f'{col} by Grade')
|
| 225 |
+
|
| 226 |
+
plt.tight_layout()
|
| 227 |
+
plt.savefig('04_features_by_grade.png', dpi=150, bbox_inches='tight')
|
| 228 |
+
plt.show()
|
| 229 |
+
|
| 230 |
+
# 4.5 Extracurricular Activities Analysis
|
| 231 |
+
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
|
| 232 |
+
|
| 233 |
+
# Grade distribution by extracurricular
|
| 234 |
+
ct = pd.crosstab(df['Extracurricular Activities'], df['grade'], normalize='index') * 100
|
| 235 |
+
ct = ct[grade_order]
|
| 236 |
+
ct.plot(kind='bar', ax=axes[0], color=colors, edgecolor='black')
|
| 237 |
+
axes[0].set_title('Grade Distribution by Extracurricular Activities')
|
| 238 |
+
axes[0].set_ylabel('Percentage')
|
| 239 |
+
axes[0].set_xticklabels(['No', 'Yes'], rotation=0)
|
| 240 |
+
axes[0].legend(title='Grade', bbox_to_anchor=(1.02, 1))
|
| 241 |
+
|
| 242 |
+
# Performance Index by extracurricular
|
| 243 |
+
sns.boxplot(data=df, x='Extracurricular Activities', y='Performance Index',
|
| 244 |
+
hue='Extracurricular Activities', palette='Set2', legend=False, ax=axes[1])
|
| 245 |
+
axes[1].set_title('Performance Index by Extracurricular Activities')
|
| 246 |
+
|
| 247 |
+
plt.tight_layout()
|
| 248 |
+
plt.savefig('05_extracurricular_analysis.png', dpi=150, bbox_inches='tight')
|
| 249 |
+
plt.show()
|
| 250 |
+
|
| 251 |
+
# 4.6 Correlation Analysis
|
| 252 |
+
plt.figure(figsize=(10, 8))
|
| 253 |
+
|
| 254 |
+
# Create correlation matrix (encode extracurricular for correlation)
|
| 255 |
+
df_corr = df.copy()
|
| 256 |
+
df_corr['Extracurricular (encoded)'] = (df_corr['Extracurricular Activities'] == 'Yes').astype(int)
|
| 257 |
+
|
| 258 |
+
corr_cols = numerical_features + ['Extracurricular (encoded)', 'Performance Index']
|
| 259 |
+
corr_matrix = df_corr[corr_cols].corr()
|
| 260 |
+
|
| 261 |
+
mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
|
| 262 |
+
sns.heatmap(corr_matrix, annot=True, cmap='RdBu_r', center=0,
|
| 263 |
+
mask=mask, square=True, linewidths=0.5, fmt='.2f')
|
| 264 |
+
plt.title('Feature Correlation Heatmap')
|
| 265 |
+
plt.tight_layout()
|
| 266 |
+
plt.savefig('06_correlation_heatmap.png', dpi=150, bbox_inches='tight')
|
| 267 |
+
plt.show()
|
| 268 |
+
|
| 269 |
+
# 4.7 Pairplot for key relationships
|
| 270 |
+
print("\nGenerating pairplot (this may take a moment)...")
|
| 271 |
+
key_features = ['Hours Studied', 'Previous Scores', 'Performance Index']
|
| 272 |
+
sample_df = df.sample(n=min(2000, len(df)), random_state=RANDOM_STATE)
|
| 273 |
+
|
| 274 |
+
g = sns.pairplot(sample_df, vars=key_features, hue='grade',
|
| 275 |
+
hue_order=grade_order, palette='RdYlGn_r',
|
| 276 |
+
diag_kind='kde', plot_kws={'alpha': 0.6})
|
| 277 |
+
g.fig.suptitle('Feature Relationships by Grade', y=1.02)
|
| 278 |
+
plt.savefig('07_pairplot.png', dpi=150, bbox_inches='tight')
|
| 279 |
+
plt.show()
|
| 280 |
+
|
| 281 |
+
# 4.8 Print correlation insights
|
| 282 |
+
print("\n📊 CORRELATION INSIGHTS")
|
| 283 |
+
print("-" * 40)
|
| 284 |
+
perf_corr = corr_matrix['Performance Index'].drop('Performance Index').sort_values(ascending=False)
|
| 285 |
+
print("Correlation with Performance Index:")
|
| 286 |
+
for feat, corr in perf_corr.items():
|
| 287 |
+
indicator = "↑↑" if corr > 0.5 else "↑" if corr > 0.3 else "→" if corr > -0.3 else "↓"
|
| 288 |
+
print(f" {indicator} {feat}: {corr:.3f}")
|
| 289 |
+
|
| 290 |
+
perform_eda(df)
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
# =============================================================================
|
| 294 |
+
# 5. DATA PREPROCESSING
|
| 295 |
+
# =============================================================================
|
| 296 |
+
|
| 297 |
+
class StudentDataPreprocessor:
|
| 298 |
+
"""Handles all data preprocessing steps."""
|
| 299 |
+
|
| 300 |
+
def __init__(self):
|
| 301 |
+
self.numerical_features = [
|
| 302 |
+
'Hours Studied',
|
| 303 |
+
'Previous Scores',
|
| 304 |
+
'Sleep Hours',
|
| 305 |
+
'Sample Question Papers Practiced'
|
| 306 |
+
]
|
| 307 |
+
self.categorical_features = ['Extracurricular Activities']
|
| 308 |
+
self.all_features = self.numerical_features + self.categorical_features
|
| 309 |
+
|
| 310 |
+
self.scaler = StandardScaler()
|
| 311 |
+
self.label_encoder = LabelEncoder()
|
| 312 |
+
self.onehot_encoder = OneHotEncoder(drop='first', sparse_output=False)
|
| 313 |
+
|
| 314 |
+
self.grade_mapping = None
|
| 315 |
+
self.class_weights = None
|
| 316 |
+
self.is_fitted = False
|
| 317 |
+
|
| 318 |
+
def fit_transform(self, df: pd.DataFrame):
|
| 319 |
+
"""Fit preprocessors and transform data."""
|
| 320 |
+
|
| 321 |
+
# Extract features
|
| 322 |
+
X_numerical = df[self.numerical_features].copy()
|
| 323 |
+
X_categorical = df[self.categorical_features].copy()
|
| 324 |
+
y = df['grade'].copy()
|
| 325 |
+
|
| 326 |
+
# Encode target
|
| 327 |
+
y_encoded = self.label_encoder.fit_transform(y)
|
| 328 |
+
self.grade_mapping = dict(zip(
|
| 329 |
+
self.label_encoder.classes_,
|
| 330 |
+
self.label_encoder.transform(self.label_encoder.classes_)
|
| 331 |
+
))
|
| 332 |
+
|
| 333 |
+
# Compute class weights
|
| 334 |
+
classes = np.unique(y_encoded)
|
| 335 |
+
weights = compute_class_weight('balanced', classes=classes, y=y_encoded)
|
| 336 |
+
self.class_weights = dict(zip(classes, weights))
|
| 337 |
+
|
| 338 |
+
# Scale numerical features
|
| 339 |
+
X_numerical_scaled = self.scaler.fit_transform(X_numerical)
|
| 340 |
+
|
| 341 |
+
# Encode categorical features
|
| 342 |
+
X_categorical_encoded = self.onehot_encoder.fit_transform(X_categorical)
|
| 343 |
+
|
| 344 |
+
# Combine features
|
| 345 |
+
X_combined = np.hstack([X_numerical_scaled, X_categorical_encoded])
|
| 346 |
+
|
| 347 |
+
# Get feature names for later
|
| 348 |
+
cat_feature_names = self.onehot_encoder.get_feature_names_out(self.categorical_features)
|
| 349 |
+
self.feature_names = self.numerical_features + list(cat_feature_names)
|
| 350 |
+
|
| 351 |
+
self.is_fitted = True
|
| 352 |
+
|
| 353 |
+
print("\n🔧 PREPROCESSING COMPLETE")
|
| 354 |
+
print("-" * 40)
|
| 355 |
+
print(f"Numerical features: {self.numerical_features}")
|
| 356 |
+
print(f"Categorical features: {self.categorical_features}")
|
| 357 |
+
print(f"Total features after encoding: {len(self.feature_names)}")
|
| 358 |
+
print(f"\nFeature names: {self.feature_names}")
|
| 359 |
+
print(f"\nTarget Mapping: {self.grade_mapping}")
|
| 360 |
+
print(f"\nClass Weights:")
|
| 361 |
+
for cls, weight in self.class_weights.items():
|
| 362 |
+
grade = self.get_grade_from_encoding(cls)
|
| 363 |
+
print(f" {grade}: {weight:.4f}")
|
| 364 |
+
|
| 365 |
+
return X_combined, y_encoded
|
| 366 |
+
|
| 367 |
+
def transform(self, df: pd.DataFrame):
|
| 368 |
+
"""Transform new data using fitted preprocessors."""
|
| 369 |
+
if not self.is_fitted:
|
| 370 |
+
raise ValueError("Preprocessor must be fitted before transforming.")
|
| 371 |
+
|
| 372 |
+
X_numerical = df[self.numerical_features].copy()
|
| 373 |
+
X_categorical = df[self.categorical_features].copy()
|
| 374 |
+
|
| 375 |
+
X_numerical_scaled = self.scaler.transform(X_numerical)
|
| 376 |
+
X_categorical_encoded = self.onehot_encoder.transform(X_categorical)
|
| 377 |
+
|
| 378 |
+
return np.hstack([X_numerical_scaled, X_categorical_encoded])
|
| 379 |
+
|
| 380 |
+
def transform_single(self, hours_studied, previous_scores, sleep_hours,
|
| 381 |
+
sample_papers, extracurricular):
|
| 382 |
+
"""Transform a single sample for prediction."""
|
| 383 |
+
if not self.is_fitted:
|
| 384 |
+
raise ValueError("Preprocessor must be fitted before transforming.")
|
| 385 |
+
|
| 386 |
+
df = pd.DataFrame({
|
| 387 |
+
'Hours Studied': [hours_studied],
|
| 388 |
+
'Previous Scores': [previous_scores],
|
| 389 |
+
'Sleep Hours': [sleep_hours],
|
| 390 |
+
'Sample Question Papers Practiced': [sample_papers],
|
| 391 |
+
'Extracurricular Activities': [extracurricular]
|
| 392 |
+
})
|
| 393 |
+
|
| 394 |
+
return self.transform(df)
|
| 395 |
+
|
| 396 |
+
def get_grade_from_encoding(self, encoding: int) -> str:
|
| 397 |
+
"""Get grade letter from numeric encoding."""
|
| 398 |
+
inv_map = {v: k for k, v in self.grade_mapping.items()}
|
| 399 |
+
return inv_map[encoding]
|
| 400 |
+
|
| 401 |
+
def save(self, filepath: str):
|
| 402 |
+
"""Save preprocessor to disk."""
|
| 403 |
+
joblib.dump(self, filepath)
|
| 404 |
+
|
| 405 |
+
@staticmethod
|
| 406 |
+
def load(filepath: str):
|
| 407 |
+
"""Load preprocessor from disk."""
|
| 408 |
+
return joblib.load(filepath)
|
| 409 |
+
|
| 410 |
+
# Initialize and fit preprocessor
|
| 411 |
+
preprocessor = StudentDataPreprocessor()
|
| 412 |
+
X, y = preprocessor.fit_transform(df)
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
# =============================================================================
|
| 416 |
+
# 6. TRAIN/TEST SPLIT
|
| 417 |
+
# =============================================================================
|
| 418 |
+
|
| 419 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 420 |
+
X, y,
|
| 421 |
+
test_size=0.20,
|
| 422 |
+
random_state=RANDOM_STATE,
|
| 423 |
+
stratify=y
|
| 424 |
+
)
|
| 425 |
+
|
| 426 |
+
print("\n📂 DATA SPLIT")
|
| 427 |
+
print("-" * 40)
|
| 428 |
+
print(f"Training set: {X_train.shape[0]:,} samples ({X_train.shape[0]/len(y)*100:.1f}%)")
|
| 429 |
+
print(f"Testing set: {X_test.shape[0]:,} samples ({X_test.shape[0]/len(y)*100:.1f}%)")
|
| 430 |
+
print(f"Features: {X_train.shape[1]}")
|
| 431 |
+
|
| 432 |
+
print(f"\nTraining set class distribution:")
|
| 433 |
+
unique, counts = np.unique(y_train, return_counts=True)
|
| 434 |
+
for u, c in zip(unique, counts):
|
| 435 |
+
print(f" {preprocessor.get_grade_from_encoding(u)}: {c:,} ({c/len(y_train)*100:.1f}%)")
|
| 436 |
+
|
| 437 |
+
|
| 438 |
+
# =============================================================================
|
| 439 |
+
# 7. MODEL TRAINING WITH CROSS-VALIDATION
|
| 440 |
+
# =============================================================================
|
| 441 |
+
|
| 442 |
+
def cross_validate_model(model, X, y, cv_folds: int = 5, model_name: str = "Model"):
|
| 443 |
+
"""Perform cross-validation and return detailed metrics."""
|
| 444 |
+
|
| 445 |
+
cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=RANDOM_STATE)
|
| 446 |
+
|
| 447 |
+
accuracy_scores = cross_val_score(model, X, y, cv=cv, scoring='accuracy', n_jobs=-1)
|
| 448 |
+
f1_macro_scores = cross_val_score(model, X, y, cv=cv, scoring='f1_macro', n_jobs=-1)
|
| 449 |
+
f1_weighted_scores = cross_val_score(model, X, y, cv=cv, scoring='f1_weighted', n_jobs=-1)
|
| 450 |
+
|
| 451 |
+
results = {
|
| 452 |
+
'model_name': model_name,
|
| 453 |
+
'accuracy_mean': accuracy_scores.mean(),
|
| 454 |
+
'accuracy_std': accuracy_scores.std(),
|
| 455 |
+
'f1_macro_mean': f1_macro_scores.mean(),
|
| 456 |
+
'f1_macro_std': f1_macro_scores.std(),
|
| 457 |
+
'f1_weighted_mean': f1_weighted_scores.mean(),
|
| 458 |
+
'f1_weighted_std': f1_weighted_scores.std(),
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
print(f"\n{model_name} - {cv_folds}-Fold Cross-Validation:")
|
| 462 |
+
print(f" Accuracy: {results['accuracy_mean']:.4f} ± {results['accuracy_std']:.4f}")
|
| 463 |
+
print(f" F1 (Macro): {results['f1_macro_mean']:.4f} ± {results['f1_macro_std']:.4f}")
|
| 464 |
+
print(f" F1 (Weight): {results['f1_weighted_mean']:.4f} ± {results['f1_weighted_std']:.4f}")
|
| 465 |
+
|
| 466 |
+
return results
|
| 467 |
+
|
| 468 |
+
print("\n🤖 MODEL TRAINING WITH CROSS-VALIDATION")
|
| 469 |
+
print("=" * 60)
|
| 470 |
+
|
| 471 |
+
# Define models
|
| 472 |
+
models = {
|
| 473 |
+
'Logistic Regression': LogisticRegression(
|
| 474 |
+
solver='lbfgs',
|
| 475 |
+
max_iter=1000,
|
| 476 |
+
random_state=RANDOM_STATE,
|
| 477 |
+
class_weight='balanced',
|
| 478 |
+
n_jobs=-1
|
| 479 |
+
),
|
| 480 |
+
'Random Forest': RandomForestClassifier(
|
| 481 |
+
n_estimators=100,
|
| 482 |
+
max_depth=15,
|
| 483 |
+
random_state=RANDOM_STATE,
|
| 484 |
+
class_weight='balanced',
|
| 485 |
+
n_jobs=-1
|
| 486 |
+
),
|
| 487 |
+
'Gradient Boosting': GradientBoostingClassifier(
|
| 488 |
+
n_estimators=100,
|
| 489 |
+
max_depth=5,
|
| 490 |
+
random_state=RANDOM_STATE
|
| 491 |
+
)
|
| 492 |
+
}
|
| 493 |
+
|
| 494 |
+
# Cross-validate all models
|
| 495 |
+
cv_results = {}
|
| 496 |
+
for name, model in models.items():
|
| 497 |
+
cv_results[name] = cross_validate_model(model, X_train, y_train, CV_FOLDS, name)
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
# =============================================================================
|
| 501 |
+
# 8. HYPERPARAMETER TUNING
|
| 502 |
+
# =============================================================================
|
| 503 |
+
|
| 504 |
+
print("\n🔍 HYPERPARAMETER TUNING")
|
| 505 |
+
print("=" * 60)
|
| 506 |
+
|
| 507 |
+
# Tune Random Forest
|
| 508 |
+
print("\nTuning Random Forest...")
|
| 509 |
+
rf_param_grid = {
|
| 510 |
+
'n_estimators': [50, 100, 200],
|
| 511 |
+
'max_depth': [10, 15, 20, None],
|
| 512 |
+
'min_samples_split': [2, 5, 10],
|
| 513 |
+
'min_samples_leaf': [1, 2, 4]
|
| 514 |
+
}
|
| 515 |
+
|
| 516 |
+
rf_grid = GridSearchCV(
|
| 517 |
+
RandomForestClassifier(random_state=RANDOM_STATE, class_weight='balanced', n_jobs=-1),
|
| 518 |
+
rf_param_grid,
|
| 519 |
+
cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE),
|
| 520 |
+
scoring='f1_macro',
|
| 521 |
+
n_jobs=-1,
|
| 522 |
+
verbose=1
|
| 523 |
+
)
|
| 524 |
+
rf_grid.fit(X_train, y_train)
|
| 525 |
+
|
| 526 |
+
print(f"\nRandom Forest Best Parameters: {rf_grid.best_params_}")
|
| 527 |
+
print(f"Random Forest Best CV F1 (Macro): {rf_grid.best_score_:.4f}")
|
| 528 |
+
|
| 529 |
+
# Tune Gradient Boosting
|
| 530 |
+
print("\nTuning Gradient Boosting...")
|
| 531 |
+
gb_param_grid = {
|
| 532 |
+
'n_estimators': [50, 100, 150],
|
| 533 |
+
'max_depth': [3, 5, 7],
|
| 534 |
+
'learning_rate': [0.05, 0.1, 0.2],
|
| 535 |
+
'min_samples_split': [2, 5]
|
| 536 |
+
}
|
| 537 |
+
|
| 538 |
+
gb_grid = GridSearchCV(
|
| 539 |
+
GradientBoostingClassifier(random_state=RANDOM_STATE),
|
| 540 |
+
gb_param_grid,
|
| 541 |
+
cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE),
|
| 542 |
+
scoring='f1_macro',
|
| 543 |
+
n_jobs=-1,
|
| 544 |
+
verbose=1
|
| 545 |
+
)
|
| 546 |
+
gb_grid.fit(X_train, y_train)
|
| 547 |
+
|
| 548 |
+
print(f"\nGradient Boosting Best Parameters: {gb_grid.best_params_}")
|
| 549 |
+
print(f"Gradient Boosting Best CV F1 (Macro): {gb_grid.best_score_:.4f}")
|
| 550 |
+
|
| 551 |
+
# Select best model
|
| 552 |
+
best_models = {
|
| 553 |
+
'Random Forest': (rf_grid.best_estimator_, rf_grid.best_score_),
|
| 554 |
+
'Gradient Boosting': (gb_grid.best_estimator_, gb_grid.best_score_)
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
best_model_name = max(best_models.keys(), key=lambda k: best_models[k][1])
|
| 558 |
+
best_model = best_models[best_model_name][0]
|
| 559 |
+
|
| 560 |
+
print(f"\n🏆 Best Model: {best_model_name}")
|
| 561 |
+
|
| 562 |
+
|
| 563 |
+
# =============================================================================
|
| 564 |
+
# 9. FINAL MODEL EVALUATION
|
| 565 |
+
# =============================================================================
|
| 566 |
+
|
| 567 |
+
def comprehensive_evaluation(model, X_test, y_test, preprocessor, model_name: str):
|
| 568 |
+
"""Comprehensive model evaluation with visualizations."""
|
| 569 |
+
|
| 570 |
+
y_pred = model.predict(X_test)
|
| 571 |
+
y_proba = model.predict_proba(X_test)
|
| 572 |
+
|
| 573 |
+
accuracy = accuracy_score(y_test, y_pred)
|
| 574 |
+
f1_macro = f1_score(y_test, y_pred, average='macro')
|
| 575 |
+
f1_weighted = f1_score(y_test, y_pred, average='weighted')
|
| 576 |
+
|
| 577 |
+
print(f"\n{'='*60}")
|
| 578 |
+
print(f"📊 {model_name} - TEST SET EVALUATION")
|
| 579 |
+
print(f"{'='*60}")
|
| 580 |
+
print(f"\nOverall Metrics:")
|
| 581 |
+
print(f" Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
|
| 582 |
+
print(f" F1 Score (Macro): {f1_macro:.4f}")
|
| 583 |
+
print(f" F1 Score (Weight): {f1_weighted:.4f}")
|
| 584 |
+
|
| 585 |
+
print(f"\nDetailed Classification Report:")
|
| 586 |
+
print(classification_report(
|
| 587 |
+
y_test, y_pred,
|
| 588 |
+
target_names=preprocessor.label_encoder.classes_,
|
| 589 |
+
zero_division=0
|
| 590 |
+
))
|
| 591 |
+
|
| 592 |
+
# Confusion Matrices
|
| 593 |
+
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
|
| 594 |
+
|
| 595 |
+
cm = confusion_matrix(y_test, y_pred)
|
| 596 |
+
disp = ConfusionMatrixDisplay(
|
| 597 |
+
confusion_matrix=cm,
|
| 598 |
+
display_labels=preprocessor.label_encoder.classes_
|
| 599 |
+
)
|
| 600 |
+
disp.plot(cmap='Blues', ax=axes[0])
|
| 601 |
+
axes[0].set_title(f'Confusion Matrix - {model_name}')
|
| 602 |
+
|
| 603 |
+
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
|
| 604 |
+
disp_norm = ConfusionMatrixDisplay(
|
| 605 |
+
confusion_matrix=cm_normalized,
|
| 606 |
+
display_labels=preprocessor.label_encoder.classes_
|
| 607 |
+
)
|
| 608 |
+
disp_norm.plot(cmap='Blues', ax=axes[1], values_format='.2%')
|
| 609 |
+
axes[1].set_title(f'Normalized Confusion Matrix - {model_name}')
|
| 610 |
+
|
| 611 |
+
plt.tight_layout()
|
| 612 |
+
plt.savefig(f'08_confusion_matrix_{model_name.lower().replace(" ", "_")}.png',
|
| 613 |
+
dpi=150, bbox_inches='tight')
|
| 614 |
+
plt.show()
|
| 615 |
+
|
| 616 |
+
return {
|
| 617 |
+
'accuracy': accuracy,
|
| 618 |
+
'f1_macro': f1_macro,
|
| 619 |
+
'f1_weighted': f1_weighted,
|
| 620 |
+
'y_pred': y_pred,
|
| 621 |
+
'y_proba': y_proba
|
| 622 |
+
}
|
| 623 |
+
|
| 624 |
+
final_results = comprehensive_evaluation(best_model, X_test, y_test, preprocessor, best_model_name)
|
| 625 |
+
|
| 626 |
+
|
| 627 |
+
# =============================================================================
|
| 628 |
+
# 10. FEATURE IMPORTANCE ANALYSIS
|
| 629 |
+
# =============================================================================
|
| 630 |
+
|
| 631 |
+
def plot_feature_importance(model, feature_names: list, model_name: str):
|
| 632 |
+
"""Visualize feature importances."""
|
| 633 |
+
|
| 634 |
+
if hasattr(model, 'feature_importances_'):
|
| 635 |
+
importances = model.feature_importances_
|
| 636 |
+
else:
|
| 637 |
+
print("Model doesn't support feature importance extraction.")
|
| 638 |
+
return
|
| 639 |
+
|
| 640 |
+
indices = np.argsort(importances)[::-1]
|
| 641 |
+
|
| 642 |
+
print(f"\n📊 Feature Importance - {model_name}")
|
| 643 |
+
print("-" * 40)
|
| 644 |
+
for i, idx in enumerate(indices):
|
| 645 |
+
print(f" {i+1}. {feature_names[idx]}: {importances[idx]:.4f} ({importances[idx]*100:.1f}%)")
|
| 646 |
+
|
| 647 |
+
plt.figure(figsize=(10, 6))
|
| 648 |
+
colors = sns.color_palette('viridis', len(feature_names))
|
| 649 |
+
bars = plt.barh(range(len(indices)), importances[indices], color=colors)
|
| 650 |
+
plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
|
| 651 |
+
plt.xlabel('Feature Importance')
|
| 652 |
+
plt.title(f'Feature Importance - {model_name}')
|
| 653 |
+
plt.gca().invert_yaxis()
|
| 654 |
+
|
| 655 |
+
for bar, imp in zip(bars, importances[indices]):
|
| 656 |
+
plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2,
|
| 657 |
+
f'{imp:.3f}', va='center', fontsize=10)
|
| 658 |
+
|
| 659 |
+
plt.tight_layout()
|
| 660 |
+
plt.savefig('09_feature_importance.png', dpi=150, bbox_inches='tight')
|
| 661 |
+
plt.show()
|
| 662 |
+
|
| 663 |
+
plot_feature_importance(best_model, preprocessor.feature_names, best_model_name)
|
| 664 |
+
|
| 665 |
+
|
| 666 |
+
# =============================================================================
|
| 667 |
+
# 11. LEARNING CURVES
|
| 668 |
+
# =============================================================================
|
| 669 |
+
|
| 670 |
+
def plot_learning_curves(model, X, y, model_name: str):
|
| 671 |
+
"""Plot learning curves to diagnose bias/variance."""
|
| 672 |
+
|
| 673 |
+
print(f"\nGenerating learning curves for {model_name}...")
|
| 674 |
+
|
| 675 |
+
train_sizes, train_scores, val_scores = learning_curve(
|
| 676 |
+
model, X, y,
|
| 677 |
+
train_sizes=np.linspace(0.1, 1.0, 10),
|
| 678 |
+
cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE),
|
| 679 |
+
scoring='f1_macro',
|
| 680 |
+
n_jobs=-1
|
| 681 |
+
)
|
| 682 |
+
|
| 683 |
+
train_mean = train_scores.mean(axis=1)
|
| 684 |
+
train_std = train_scores.std(axis=1)
|
| 685 |
+
val_mean = val_scores.mean(axis=1)
|
| 686 |
+
val_std = val_scores.std(axis=1)
|
| 687 |
+
|
| 688 |
+
plt.figure(figsize=(10, 6))
|
| 689 |
+
plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std,
|
| 690 |
+
alpha=0.1, color='blue')
|
| 691 |
+
plt.fill_between(train_sizes, val_mean - val_std, val_mean + val_std,
|
| 692 |
+
alpha=0.1, color='orange')
|
| 693 |
+
plt.plot(train_sizes, train_mean, 'o-', color='blue', label='Training Score')
|
| 694 |
+
plt.plot(train_sizes, val_mean, 'o-', color='orange', label='Validation Score')
|
| 695 |
+
plt.xlabel('Training Set Size')
|
| 696 |
+
plt.ylabel('F1 Score (Macro)')
|
| 697 |
+
plt.title(f'Learning Curves - {model_name}')
|
| 698 |
+
plt.legend(loc='lower right')
|
| 699 |
+
plt.grid(True, alpha=0.3)
|
| 700 |
+
plt.tight_layout()
|
| 701 |
+
plt.savefig('10_learning_curves.png', dpi=150, bbox_inches='tight')
|
| 702 |
+
plt.show()
|
| 703 |
+
|
| 704 |
+
final_gap = train_mean[-1] - val_mean[-1]
|
| 705 |
+
print(f"\n📈 Learning Curve Analysis:")
|
| 706 |
+
print(f" Final Training Score: {train_mean[-1]:.4f}")
|
| 707 |
+
print(f" Final Validation Score: {val_mean[-1]:.4f}")
|
| 708 |
+
print(f" Gap: {final_gap:.4f}")
|
| 709 |
+
|
| 710 |
+
if final_gap > 0.1:
|
| 711 |
+
print(" ⚠️ High variance - model may be overfitting")
|
| 712 |
+
elif val_mean[-1] < 0.6:
|
| 713 |
+
print(" ⚠️ High bias - model may be underfitting")
|
| 714 |
+
else:
|
| 715 |
+
print(" ✅ Model appears well-balanced")
|
| 716 |
+
|
| 717 |
+
# Create fresh model for learning curves
|
| 718 |
+
if best_model_name == 'Random Forest':
|
| 719 |
+
model_for_curves = RandomForestClassifier(**rf_grid.best_params_,
|
| 720 |
+
random_state=RANDOM_STATE,
|
| 721 |
+
class_weight='balanced',
|
| 722 |
+
n_jobs=-1)
|
| 723 |
+
else:
|
| 724 |
+
model_for_curves = GradientBoostingClassifier(**gb_grid.best_params_,
|
| 725 |
+
random_state=RANDOM_STATE)
|
| 726 |
+
|
| 727 |
+
plot_learning_curves(model_for_curves, X_train, y_train, best_model_name)
|
| 728 |
+
|
| 729 |
+
|
| 730 |
+
# =============================================================================
|
| 731 |
+
# 12. MODEL COMPARISON SUMMARY
|
| 732 |
+
# =============================================================================
|
| 733 |
+
|
| 734 |
+
def create_comparison_summary(cv_results: dict, best_model_name: str, final_accuracy: float):
|
| 735 |
+
"""Create a summary comparison table."""
|
| 736 |
+
|
| 737 |
+
print("\n" + "=" * 60)
|
| 738 |
+
print("📋 MODEL COMPARISON SUMMARY")
|
| 739 |
+
print("=" * 60)
|
| 740 |
+
|
| 741 |
+
summary_data = []
|
| 742 |
+
for name, results in cv_results.items():
|
| 743 |
+
summary_data.append({
|
| 744 |
+
'Model': name,
|
| 745 |
+
'CV Accuracy': f"{results['accuracy_mean']:.4f} ± {results['accuracy_std']:.4f}",
|
| 746 |
+
'CV F1 (Macro)': f"{results['f1_macro_mean']:.4f} ± {results['f1_macro_std']:.4f}",
|
| 747 |
+
'CV F1 (Weighted)': f"{results['f1_weighted_mean']:.4f} ± {results['f1_weighted_std']:.4f}"
|
| 748 |
+
})
|
| 749 |
+
|
| 750 |
+
summary_df = pd.DataFrame(summary_data)
|
| 751 |
+
print(summary_df.to_string(index=False))
|
| 752 |
+
|
| 753 |
+
# Visualization
|
| 754 |
+
fig, ax = plt.subplots(figsize=(12, 6))
|
| 755 |
+
|
| 756 |
+
x = np.arange(len(cv_results))
|
| 757 |
+
width = 0.35
|
| 758 |
+
|
| 759 |
+
accuracies = [r['accuracy_mean'] for r in cv_results.values()]
|
| 760 |
+
f1_scores = [r['f1_macro_mean'] for r in cv_results.values()]
|
| 761 |
+
|
| 762 |
+
bars1 = ax.bar(x - width/2, accuracies, width, label='Accuracy', color='steelblue')
|
| 763 |
+
bars2 = ax.bar(x + width/2, f1_scores, width, label='F1 (Macro)', color='darkorange')
|
| 764 |
+
|
| 765 |
+
ax.set_ylabel('Score')
|
| 766 |
+
ax.set_title('Model Comparison - Cross-Validation Results')
|
| 767 |
+
ax.set_xticks(x)
|
| 768 |
+
ax.set_xticklabels(cv_results.keys())
|
| 769 |
+
ax.legend()
|
| 770 |
+
ax.set_ylim(0, 1.0)
|
| 771 |
+
|
| 772 |
+
for bar in bars1 + bars2:
|
| 773 |
+
height = bar.get_height()
|
| 774 |
+
ax.annotate(f'{height:.3f}', xy=(bar.get_x() + bar.get_width()/2, height),
|
| 775 |
+
xytext=(0, 3), textcoords="offset points", ha='center', fontsize=9)
|
| 776 |
+
|
| 777 |
+
plt.tight_layout()
|
| 778 |
+
plt.savefig('11_model_comparison.png', dpi=150, bbox_inches='tight')
|
| 779 |
+
plt.show()
|
| 780 |
+
|
| 781 |
+
create_comparison_summary(cv_results, best_model_name, final_results['accuracy'])
|
| 782 |
+
|
| 783 |
+
|
| 784 |
+
# =============================================================================
|
| 785 |
+
# 13. AGENT-READY PREDICTION CLASS
|
| 786 |
+
# =============================================================================
|
| 787 |
+
|
| 788 |
+
class StudentGradePredictor:
|
| 789 |
+
"""
|
| 790 |
+
Production-ready grade prediction class for agent integration.
|
| 791 |
+
"""
|
| 792 |
+
|
| 793 |
+
def __init__(self, model, preprocessor: StudentDataPreprocessor):
|
| 794 |
+
self.model = model
|
| 795 |
+
self.preprocessor = preprocessor
|
| 796 |
+
self.grade_order = ['A', 'B', 'C', 'D', 'F']
|
| 797 |
+
|
| 798 |
+
self.valid_ranges = {
|
| 799 |
+
'hours_studied': (0, 50),
|
| 800 |
+
'previous_scores': (0, 100),
|
| 801 |
+
'sleep_hours': (0, 24),
|
| 802 |
+
'sample_papers': (0, 20),
|
| 803 |
+
'extracurricular': ['Yes', 'No']
|
| 804 |
+
}
|
| 805 |
+
|
| 806 |
+
def validate_input(self, hours_studied, previous_scores, sleep_hours,
|
| 807 |
+
sample_papers, extracurricular) -> tuple:
|
| 808 |
+
"""Validate input values."""
|
| 809 |
+
|
| 810 |
+
errors = []
|
| 811 |
+
|
| 812 |
+
# Check numerical ranges
|
| 813 |
+
checks = [
|
| 814 |
+
('hours_studied', hours_studied, self.valid_ranges['hours_studied']),
|
| 815 |
+
('previous_scores', previous_scores, self.valid_ranges['previous_scores']),
|
| 816 |
+
('sleep_hours', sleep_hours, self.valid_ranges['sleep_hours']),
|
| 817 |
+
('sample_papers', sample_papers, self.valid_ranges['sample_papers']),
|
| 818 |
+
]
|
| 819 |
+
|
| 820 |
+
for name, value, (min_val, max_val) in checks:
|
| 821 |
+
if not (min_val <= value <= max_val):
|
| 822 |
+
errors.append(f"{name} must be between {min_val} and {max_val} (got {value})")
|
| 823 |
+
|
| 824 |
+
# Check categorical
|
| 825 |
+
if extracurricular not in self.valid_ranges['extracurricular']:
|
| 826 |
+
errors.append(f"extracurricular must be 'Yes' or 'No' (got {extracurricular})")
|
| 827 |
+
|
| 828 |
+
if errors:
|
| 829 |
+
return False, "; ".join(errors)
|
| 830 |
+
return True, "Valid"
|
| 831 |
+
|
| 832 |
+
def predict(self, hours_studied: float, previous_scores: float,
|
| 833 |
+
sleep_hours: float, sample_papers: int,
|
| 834 |
+
extracurricular: str) -> dict:
|
| 835 |
+
"""
|
| 836 |
+
Make a grade prediction with confidence scores.
|
| 837 |
+
|
| 838 |
+
Parameters:
|
| 839 |
+
-----------
|
| 840 |
+
hours_studied : float - Total hours spent studying (0-50)
|
| 841 |
+
previous_scores : float - Previous test scores (0-100)
|
| 842 |
+
sleep_hours : float - Average daily sleep hours (0-24)
|
| 843 |
+
sample_papers : int - Number of practice papers completed (0-20)
|
| 844 |
+
extracurricular : str - Participates in extracurricular activities ('Yes'/'No')
|
| 845 |
+
|
| 846 |
+
Returns:
|
| 847 |
+
--------
|
| 848 |
+
dict : Prediction results
|
| 849 |
+
"""
|
| 850 |
+
|
| 851 |
+
# Validate input
|
| 852 |
+
is_valid, message = self.validate_input(
|
| 853 |
+
hours_studied, previous_scores, sleep_hours, sample_papers, extracurricular
|
| 854 |
+
)
|
| 855 |
+
if not is_valid:
|
| 856 |
+
return {
|
| 857 |
+
'success': False,
|
| 858 |
+
'error': message,
|
| 859 |
+
'predicted_grade': None,
|
| 860 |
+
'confidence': None
|
| 861 |
+
}
|
| 862 |
+
|
| 863 |
+
# Transform input
|
| 864 |
+
X = self.preprocessor.transform_single(
|
| 865 |
+
hours_studied, previous_scores, sleep_hours,
|
| 866 |
+
sample_papers, extracurricular
|
| 867 |
+
)
|
| 868 |
+
|
| 869 |
+
# Predict
|
| 870 |
+
prediction = self.model.predict(X)[0]
|
| 871 |
+
probabilities = self.model.predict_proba(X)[0]
|
| 872 |
+
|
| 873 |
+
predicted_grade = self.preprocessor.get_grade_from_encoding(prediction)
|
| 874 |
+
confidence = probabilities[prediction]
|
| 875 |
+
|
| 876 |
+
# Probability distribution
|
| 877 |
+
prob_distribution = {}
|
| 878 |
+
for i, grade in enumerate(self.preprocessor.label_encoder.classes_):
|
| 879 |
+
prob_distribution[grade] = round(probabilities[i] * 100, 2)
|
| 880 |
+
|
| 881 |
+
# Generate insights
|
| 882 |
+
recommendation = self._generate_recommendation(
|
| 883 |
+
predicted_grade, confidence, hours_studied, previous_scores,
|
| 884 |
+
sleep_hours, sample_papers, extracurricular
|
| 885 |
+
)
|
| 886 |
+
|
| 887 |
+
confidence_level = self._get_confidence_level(confidence)
|
| 888 |
+
|
| 889 |
+
return {
|
| 890 |
+
'success': True,
|
| 891 |
+
'predicted_grade': predicted_grade,
|
| 892 |
+
'confidence': round(confidence * 100, 2),
|
| 893 |
+
'confidence_level': confidence_level,
|
| 894 |
+
'probability_distribution': prob_distribution,
|
| 895 |
+
'input_summary': {
|
| 896 |
+
'hours_studied': hours_studied,
|
| 897 |
+
'previous_scores': previous_scores,
|
| 898 |
+
'sleep_hours': sleep_hours,
|
| 899 |
+
'sample_papers': sample_papers,
|
| 900 |
+
'extracurricular': extracurricular
|
| 901 |
+
},
|
| 902 |
+
'recommendation': recommendation,
|
| 903 |
+
'disclaimer': (
|
| 904 |
+
"This prediction is based on statistical patterns and should inform, "
|
| 905 |
+
"not replace, professional educator judgment."
|
| 906 |
+
)
|
| 907 |
+
}
|
| 908 |
+
|
| 909 |
+
def _get_confidence_level(self, confidence: float) -> str:
|
| 910 |
+
if confidence >= 0.7:
|
| 911 |
+
return "HIGH"
|
| 912 |
+
elif confidence >= 0.4:
|
| 913 |
+
return "MODERATE"
|
| 914 |
+
else:
|
| 915 |
+
return "LOW"
|
| 916 |
+
|
| 917 |
+
def _generate_recommendation(self, grade, confidence, hours_studied,
|
| 918 |
+
previous_scores, sleep_hours, sample_papers,
|
| 919 |
+
extracurricular):
|
| 920 |
+
"""Generate actionable recommendations."""
|
| 921 |
+
|
| 922 |
+
recommendations = []
|
| 923 |
+
|
| 924 |
+
if grade in ['D', 'F']:
|
| 925 |
+
recommendations.append("⚠️ Student may need intervention.")
|
| 926 |
+
|
| 927 |
+
if hours_studied < 5:
|
| 928 |
+
recommendations.append("📚 Study hours are very low - recommend study plan.")
|
| 929 |
+
if previous_scores < 60:
|
| 930 |
+
recommendations.append("📝 Previous performance concerning - consider tutoring.")
|
| 931 |
+
if sleep_hours < 6:
|
| 932 |
+
recommendations.append("😴 Sleep deprivation may be affecting performance.")
|
| 933 |
+
if sample_papers < 2:
|
| 934 |
+
recommendations.append("📋 More practice tests recommended.")
|
| 935 |
+
|
| 936 |
+
elif grade == 'C':
|
| 937 |
+
recommendations.append("📊 Average performance - room for improvement.")
|
| 938 |
+
if hours_studied < 7:
|
| 939 |
+
recommendations.append("📚 Increasing study hours could help.")
|
| 940 |
+
if sample_papers < 3:
|
| 941 |
+
recommendations.append("📋 More practice papers recommended.")
|
| 942 |
+
|
| 943 |
+
elif grade == 'B':
|
| 944 |
+
recommendations.append("👍 Good performance.")
|
| 945 |
+
if hours_studied < 8 or sample_papers < 4:
|
| 946 |
+
recommendations.append("📈 Small improvements could push to A grade.")
|
| 947 |
+
|
| 948 |
+
else: # A
|
| 949 |
+
recommendations.append("🌟 Excellent! Student is performing very well.")
|
| 950 |
+
|
| 951 |
+
if confidence < 0.4:
|
| 952 |
+
recommendations.append("⚡ Low confidence - consider additional assessment.")
|
| 953 |
+
|
| 954 |
+
return " ".join(recommendations)
|
| 955 |
+
|
| 956 |
+
def predict_batch(self, df: pd.DataFrame) -> pd.DataFrame:
|
| 957 |
+
"""Make predictions for multiple students."""
|
| 958 |
+
|
| 959 |
+
results = []
|
| 960 |
+
for _, row in df.iterrows():
|
| 961 |
+
result = self.predict(
|
| 962 |
+
row['Hours Studied'],
|
| 963 |
+
row['Previous Scores'],
|
| 964 |
+
row['Sleep Hours'],
|
| 965 |
+
row['Sample Question Papers Practiced'],
|
| 966 |
+
row['Extracurricular Activities']
|
| 967 |
+
)
|
| 968 |
+
results.append({
|
| 969 |
+
'predicted_grade': result.get('predicted_grade'),
|
| 970 |
+
'confidence': result.get('confidence'),
|
| 971 |
+
'confidence_level': result.get('confidence_level')
|
| 972 |
+
})
|
| 973 |
+
|
| 974 |
+
return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)], axis=1)
|
| 975 |
+
|
| 976 |
+
def save(self, directory: str = 'model_artifacts'):
|
| 977 |
+
"""Save all model artifacts."""
|
| 978 |
+
path = Path(directory)
|
| 979 |
+
path.mkdir(exist_ok=True)
|
| 980 |
+
|
| 981 |
+
joblib.dump(self.model, path / 'model.pkl')
|
| 982 |
+
joblib.dump(self.preprocessor, path / 'preprocessor.pkl')
|
| 983 |
+
joblib.dump(self.valid_ranges, path / 'valid_ranges.pkl')
|
| 984 |
+
|
| 985 |
+
print(f"✅ Model artifacts saved to '{directory}/'")
|
| 986 |
+
|
| 987 |
+
@classmethod
|
| 988 |
+
def load(cls, directory: str = 'model_artifacts'):
|
| 989 |
+
"""Load model artifacts."""
|
| 990 |
+
path = Path(directory)
|
| 991 |
+
|
| 992 |
+
model = joblib.load(path / 'model.pkl')
|
| 993 |
+
preprocessor = joblib.load(path / 'preprocessor.pkl')
|
| 994 |
+
|
| 995 |
+
predictor = cls(model, preprocessor)
|
| 996 |
+
predictor.valid_ranges = joblib.load(path / 'valid_ranges.pkl')
|
| 997 |
+
|
| 998 |
+
print(f"✅ Model loaded from '{directory}/'")
|
| 999 |
+
return predictor
|
| 1000 |
+
|
| 1001 |
+
|
| 1002 |
+
# Initialize and save predictor
|
| 1003 |
+
predictor = StudentGradePredictor(best_model, preprocessor)
|
| 1004 |
+
predictor.save('model_artifacts')
|
| 1005 |
+
|
| 1006 |
+
|
| 1007 |
+
# =============================================================================
|
| 1008 |
+
# 14. INTERACTIVE DEMONSTRATION
|
| 1009 |
+
# =============================================================================
|
| 1010 |
+
|
| 1011 |
+
def display_prediction_report(result: dict):
|
| 1012 |
+
"""Display a formatted prediction report."""
|
| 1013 |
+
|
| 1014 |
+
if not result['success']:
|
| 1015 |
+
print(f"\n❌ PREDICTION FAILED: {result['error']}")
|
| 1016 |
+
return
|
| 1017 |
+
|
| 1018 |
+
print("\n" + "=" * 60)
|
| 1019 |
+
print(" 🎓 STUDENT PERFORMANCE PREDICTION REPORT")
|
| 1020 |
+
print("=" * 60)
|
| 1021 |
+
|
| 1022 |
+
inp = result['input_summary']
|
| 1023 |
+
print(f"\n📋 INPUT PARAMETERS:")
|
| 1024 |
+
print(f" • Hours Studied: {inp['hours_studied']:>6} h")
|
| 1025 |
+
print(f" • Previous Scores: {inp['previous_scores']:>6}")
|
| 1026 |
+
print(f" • Sleep Hours: {inp['sleep_hours']:>6} h/day")
|
| 1027 |
+
print(f" • Practice Papers: {inp['sample_papers']:>6}")
|
| 1028 |
+
print(f" • Extracurricular: {inp['extracurricular']:>6}")
|
| 1029 |
+
|
| 1030 |
+
print(f"\n🎯 PREDICTION:")
|
| 1031 |
+
print(f" • Predicted Grade: {result['predicted_grade']}")
|
| 1032 |
+
print(f" • Confidence: {result['confidence']:.1f}% ({result['confidence_level']})")
|
| 1033 |
+
|
| 1034 |
+
print(f"\n📊 PROBABILITY DISTRIBUTION:")
|
| 1035 |
+
for grade in ['A', 'B', 'C', 'D', 'F']:
|
| 1036 |
+
prob = result['probability_distribution'].get(grade, 0)
|
| 1037 |
+
bar_length = int(prob / 5)
|
| 1038 |
+
bar = "█" * bar_length
|
| 1039 |
+
print(f" {grade}: {bar:<20} {prob:>5.1f}%")
|
| 1040 |
+
|
| 1041 |
+
print(f"\n💡 RECOMMENDATION:")
|
| 1042 |
+
print(f" {result['recommendation']}")
|
| 1043 |
+
|
| 1044 |
+
print("=" * 60)
|
| 1045 |
+
|
| 1046 |
+
print("\n" + "🧪 " * 20)
|
| 1047 |
+
print(" INTERACTIVE PREDICTION DEMONSTRATIONS")
|
| 1048 |
+
print("🧪 " * 20)
|
| 1049 |
+
|
| 1050 |
+
# Test Case 1: High-performing student
|
| 1051 |
+
result1 = predictor.predict(
|
| 1052 |
+
hours_studied=9,
|
| 1053 |
+
previous_scores=95,
|
| 1054 |
+
sleep_hours=8,
|
| 1055 |
+
sample_papers=5,
|
| 1056 |
+
extracurricular='Yes'
|
| 1057 |
+
)
|
| 1058 |
+
display_prediction_report(result1)
|
| 1059 |
+
|
| 1060 |
+
# Test Case 2: Struggling student
|
| 1061 |
+
result2 = predictor.predict(
|
| 1062 |
+
hours_studied=2,
|
| 1063 |
+
previous_scores=45,
|
| 1064 |
+
sleep_hours=5,
|
| 1065 |
+
sample_papers=0,
|
| 1066 |
+
extracurricular='No'
|
| 1067 |
+
)
|
| 1068 |
+
display_prediction_report(result2)
|
| 1069 |
+
|
| 1070 |
+
# Test Case 3: Average student
|
| 1071 |
+
result3 = predictor.predict(
|
| 1072 |
+
hours_studied=5,
|
| 1073 |
+
previous_scores=70,
|
| 1074 |
+
sleep_hours=7,
|
| 1075 |
+
sample_papers=2,
|
| 1076 |
+
extracurricular='Yes'
|
| 1077 |
+
)
|
| 1078 |
+
display_prediction_report(result3)
|
| 1079 |
+
|
| 1080 |
+
# Test Case 4: Edge case - high previous scores but low effort
|
| 1081 |
+
result4 = predictor.predict(
|
| 1082 |
+
hours_studied=1,
|
| 1083 |
+
previous_scores=85,
|
| 1084 |
+
sleep_hours=6,
|
| 1085 |
+
sample_papers=1,
|
| 1086 |
+
extracurricular='No'
|
| 1087 |
+
)
|
| 1088 |
+
display_prediction_report(result4)
|
| 1089 |
+
|
| 1090 |
+
# Test Case 5: Invalid input
|
| 1091 |
+
result5 = predictor.predict(
|
| 1092 |
+
hours_studied=-5,
|
| 1093 |
+
previous_scores=150,
|
| 1094 |
+
sleep_hours=30,
|
| 1095 |
+
sample_papers=0,
|
| 1096 |
+
extracurricular='Maybe'
|
| 1097 |
+
)
|
| 1098 |
+
display_prediction_report(result5)
|
| 1099 |
+
|
| 1100 |
+
|
grade_multiclass/target_distribution.png
ADDED
|
lr_attendance/2018-2019_Daily_Attendance_20240429.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
lr_attendance/add_weather_features.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import requests
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
# Load the engineered attendance data
|
| 8 |
+
df = pd.read_csv("attendance_with_features.csv")
|
| 9 |
+
|
| 10 |
+
# NYC coordinates for Central Park
|
| 11 |
+
NYC_LAT = 40.7789
|
| 12 |
+
NYC_LON = -73.9692
|
| 13 |
+
|
| 14 |
+
# Convert date column to datetime if not already
|
| 15 |
+
df["date"] = pd.to_datetime(df["Date"], format="%Y%m%d")
|
| 16 |
+
|
| 17 |
+
# Get unique dates from our dataset
|
| 18 |
+
unique_dates = sorted(df["date"].dt.date.unique())
|
| 19 |
+
print(
|
| 20 |
+
f"Fetching weather data for {len(unique_dates)} unique dates from {unique_dates[0]} to {unique_dates[-1]}"
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def fetch_weather_data(start_date, end_date):
|
| 25 |
+
"""Fetch weather data from Open-Meteo API"""
|
| 26 |
+
url = "https://archive-api.open-meteo.com/v1/archive"
|
| 27 |
+
|
| 28 |
+
params = {
|
| 29 |
+
"latitude": NYC_LAT,
|
| 30 |
+
"longitude": NYC_LON,
|
| 31 |
+
"start_date": start_date,
|
| 32 |
+
"end_date": end_date,
|
| 33 |
+
"daily": [
|
| 34 |
+
"temperature_2m_max",
|
| 35 |
+
"temperature_2m_min",
|
| 36 |
+
"temperature_2m_mean",
|
| 37 |
+
"precipitation_sum",
|
| 38 |
+
"rain_sum",
|
| 39 |
+
"snowfall_sum",
|
| 40 |
+
"precipitation_hours",
|
| 41 |
+
"wind_speed_10m_max",
|
| 42 |
+
"wind_gusts_10m_max",
|
| 43 |
+
"weather_code",
|
| 44 |
+
"sunshine_duration",
|
| 45 |
+
"daylight_duration",
|
| 46 |
+
],
|
| 47 |
+
"timezone": "America/New_York",
|
| 48 |
+
"temperature_unit": "celsius",
|
| 49 |
+
"wind_speed_unit": "kmh",
|
| 50 |
+
"precipitation_unit": "mm",
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
response = requests.get(url, params=params)
|
| 55 |
+
response.raise_for_status()
|
| 56 |
+
data = response.json()
|
| 57 |
+
|
| 58 |
+
# Convert to DataFrame
|
| 59 |
+
weather_df = pd.DataFrame(
|
| 60 |
+
{
|
| 61 |
+
"date": pd.to_datetime(data["daily"]["time"]).date,
|
| 62 |
+
"temp_max": data["daily"]["temperature_2m_max"],
|
| 63 |
+
"temp_min": data["daily"]["temperature_2m_min"],
|
| 64 |
+
"temp_mean": data["daily"]["temperature_2m_mean"],
|
| 65 |
+
"precipitation_total": data["daily"]["precipitation_sum"],
|
| 66 |
+
"rain_total": data["daily"]["rain_sum"],
|
| 67 |
+
"snow_total": data["daily"]["snowfall_sum"],
|
| 68 |
+
"precipitation_hours": data["daily"]["precipitation_hours"],
|
| 69 |
+
"wind_speed_max": data["daily"]["wind_speed_10m_max"],
|
| 70 |
+
"wind_gust_max": data["daily"]["wind_gusts_10m_max"],
|
| 71 |
+
"weather_code": data["daily"]["weather_code"],
|
| 72 |
+
"sunshine_duration": data["daily"]["sunshine_duration"],
|
| 73 |
+
"daylight_duration": data["daily"]["daylight_duration"],
|
| 74 |
+
}
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
return weather_df
|
| 78 |
+
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"Error fetching weather data: {e}")
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# Split date range into chunks to avoid API limits
|
| 85 |
+
weather_data = []
|
| 86 |
+
chunk_size = 365 # days per request
|
| 87 |
+
|
| 88 |
+
for i in range(0, len(unique_dates), chunk_size):
|
| 89 |
+
chunk_dates = unique_dates[i : i + chunk_size]
|
| 90 |
+
start_date = chunk_dates[0].strftime("%Y-%m-%d")
|
| 91 |
+
end_date = chunk_dates[-1].strftime("%Y-%m-%d")
|
| 92 |
+
|
| 93 |
+
print(f"Fetching weather for {start_date} to {end_date}...")
|
| 94 |
+
|
| 95 |
+
chunk_weather = fetch_weather_data(start_date, end_date)
|
| 96 |
+
if chunk_weather is not None:
|
| 97 |
+
weather_data.append(chunk_weather)
|
| 98 |
+
|
| 99 |
+
# Rate limiting
|
| 100 |
+
time.sleep(1)
|
| 101 |
+
|
| 102 |
+
# Combine all weather data
|
| 103 |
+
if weather_data:
|
| 104 |
+
weather_df = pd.concat(weather_data, ignore_index=True)
|
| 105 |
+
print(f"Successfully fetched weather data for {len(weather_df)} days")
|
| 106 |
+
|
| 107 |
+
# Save weather data
|
| 108 |
+
weather_df.to_csv("nyc_weather_2018_2019.csv", index=False)
|
| 109 |
+
print("Weather data saved as 'nyc_weather_2018_2019.csv'")
|
| 110 |
+
|
| 111 |
+
# Merge with attendance data
|
| 112 |
+
df["date_key"] = df["date"].dt.date
|
| 113 |
+
weather_df["date_key"] = weather_df["date"]
|
| 114 |
+
|
| 115 |
+
# Merge weather features
|
| 116 |
+
attendance_with_weather = df.merge(
|
| 117 |
+
weather_df.drop("date", axis=1), on="date_key", how="left"
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# Create weather-related features
|
| 121 |
+
attendance_with_weather["temp_range"] = (
|
| 122 |
+
attendance_with_weather["temp_max"] - attendance_with_weather["temp_min"]
|
| 123 |
+
)
|
| 124 |
+
attendance_with_weather["is_rainy_day"] = (
|
| 125 |
+
attendance_with_weather["precipitation_total"] > 2.0
|
| 126 |
+
).astype(int)
|
| 127 |
+
attendance_with_weather["is_snowy_day"] = (
|
| 128 |
+
attendance_with_weather["snow_total"] > 0.5
|
| 129 |
+
).astype(int)
|
| 130 |
+
attendance_with_weather["is_windy_day"] = (
|
| 131 |
+
attendance_with_weather["wind_speed_max"] > 20.0
|
| 132 |
+
).astype(int)
|
| 133 |
+
attendance_with_weather["is_extreme_temp"] = (
|
| 134 |
+
(attendance_with_weather["temp_max"] > 32)
|
| 135 |
+
| (attendance_with_weather["temp_min"] < -5)
|
| 136 |
+
).astype(int)
|
| 137 |
+
|
| 138 |
+
# Weather severity score (0-1, higher = worse conditions)
|
| 139 |
+
attendance_with_weather["weather_severity"] = (
|
| 140 |
+
attendance_with_weather["precipitation_total"] / 50 # normalize heavy rain
|
| 141 |
+
+ attendance_with_weather["snow_total"] / 20 # normalize snow
|
| 142 |
+
+ attendance_with_weather["wind_speed_max"] / 50 # normalize wind
|
| 143 |
+
).clip(0, 1)
|
| 144 |
+
|
| 145 |
+
print("\nWeather features added:")
|
| 146 |
+
weather_features = [
|
| 147 |
+
col
|
| 148 |
+
for col in attendance_with_weather.columns
|
| 149 |
+
if col
|
| 150 |
+
in [
|
| 151 |
+
"temp_max",
|
| 152 |
+
"temp_min",
|
| 153 |
+
"temp_mean",
|
| 154 |
+
"temp_range",
|
| 155 |
+
"precipitation_total",
|
| 156 |
+
"rain_total",
|
| 157 |
+
"snow_total",
|
| 158 |
+
"precipitation_hours",
|
| 159 |
+
"wind_speed_max",
|
| 160 |
+
"wind_gust_max",
|
| 161 |
+
"weather_code",
|
| 162 |
+
"sunshine_duration",
|
| 163 |
+
"daylight_duration",
|
| 164 |
+
"is_rainy_day",
|
| 165 |
+
"is_snowy_day",
|
| 166 |
+
"is_windy_day",
|
| 167 |
+
"is_extreme_temp",
|
| 168 |
+
"weather_severity",
|
| 169 |
+
]
|
| 170 |
+
]
|
| 171 |
+
for feature in weather_features:
|
| 172 |
+
print(f"- {feature}")
|
| 173 |
+
|
| 174 |
+
# Save final dataset
|
| 175 |
+
attendance_with_weather.to_csv("attendance_features_complete.csv", index=False)
|
| 176 |
+
print(
|
| 177 |
+
f"\nFinal dataset with {len(attendance_with_weather.columns)} total features saved as 'attendance_features_complete.csv'"
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
print("\nSample of weather-related features:")
|
| 181 |
+
print(
|
| 182 |
+
attendance_with_weather[
|
| 183 |
+
[
|
| 184 |
+
"Date",
|
| 185 |
+
"attendance_rate",
|
| 186 |
+
"temp_mean",
|
| 187 |
+
"precipitation_total",
|
| 188 |
+
"is_rainy_day",
|
| 189 |
+
"weather_severity",
|
| 190 |
+
]
|
| 191 |
+
].head(10)
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
else:
|
| 195 |
+
print("Failed to fetch weather data")
|
lr_attendance/best_model_coefficients.csv
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Feature,Coefficient
|
| 2 |
+
school_avg_attendance,7.137911799297406
|
| 3 |
+
temp_mean,1.4284187566515165
|
| 4 |
+
day_of_week,-0.9758038207498889
|
| 5 |
+
precipitation_hours,-0.7556588622303482
|
| 6 |
+
is_holiday,-0.6112820331872162
|
| 7 |
+
is_snowy_day,-0.6059281530329762
|
| 8 |
+
is_monday,-0.5594793735265182
|
| 9 |
+
school_std_attendance,0.5048872431769107
|
| 10 |
+
is_rainy_day,0.1904241431075877
|
| 11 |
+
days_to_next_holiday,0.13888911829632
|
| 12 |
+
school_year_progress,0.09479814368925972
|
| 13 |
+
is_friday,-0.016717455812732072
|
lr_attendance/explore_data.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from datetime import datetime, timedelta
|
| 4 |
+
import holidays
|
| 5 |
+
|
| 6 |
+
# Load the data
|
| 7 |
+
df = pd.read_csv("2018-2019_Daily_Attendance_20240429.csv")
|
| 8 |
+
|
| 9 |
+
# Basic dataset analysis
|
| 10 |
+
print("Dataset Info:")
|
| 11 |
+
print(f"Total records: {len(df)}")
|
| 12 |
+
print(f"Date range: {df['Date'].min()} to {df['Date'].max()}")
|
| 13 |
+
print(f"Unique schools: {df['School DBN'].nunique()}")
|
| 14 |
+
print("\nColumns:", df.columns.tolist())
|
| 15 |
+
|
| 16 |
+
# Check for missing values
|
| 17 |
+
print("\nMissing values:")
|
| 18 |
+
print(df.isnull().sum())
|
| 19 |
+
|
| 20 |
+
# Create attendance rate
|
| 21 |
+
df["attendance_rate"] = (df["Present"] / df["Enrolled"]) * 100
|
| 22 |
+
|
| 23 |
+
# Basic statistics
|
| 24 |
+
print("\nAttendance Rate Statistics:")
|
| 25 |
+
print(df["attendance_rate"].describe())
|
| 26 |
+
|
| 27 |
+
print("\nSample data:")
|
| 28 |
+
print(df.head())
|
lr_attendance/feature_engineering.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
import holidays
|
| 5 |
+
|
| 6 |
+
# Load the data
|
| 7 |
+
df = pd.read_csv("2018-2019_Daily_Attendance_20240429.csv")
|
| 8 |
+
|
| 9 |
+
# Convert date column to datetime
|
| 10 |
+
df["date"] = pd.to_datetime(df["Date"], format="%Y%m%d")
|
| 11 |
+
|
| 12 |
+
# Create attendance rate (target variable)
|
| 13 |
+
df["attendance_rate"] = (df["Present"] / df["Enrolled"]) * 100
|
| 14 |
+
|
| 15 |
+
# Extract temporal features
|
| 16 |
+
df["day_of_week"] = df["date"].dt.dayofweek # 0=Monday, 6=Sunday
|
| 17 |
+
df["day_of_week_name"] = df["date"].dt.day_name()
|
| 18 |
+
df["month"] = df["date"].dt.month
|
| 19 |
+
df["month_name"] = df["date"].dt.month_name()
|
| 20 |
+
df["quarter"] = df["date"].dt.quarter
|
| 21 |
+
df["week_of_year"] = df["date"].dt.isocalendar().week
|
| 22 |
+
df["day_of_month"] = df["date"].dt.day
|
| 23 |
+
df["day_of_year"] = df["date"].dt.dayofyear
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# Season mapping
|
| 27 |
+
def get_season(month):
|
| 28 |
+
if month in [12, 1, 2]:
|
| 29 |
+
return "Winter"
|
| 30 |
+
elif month in [3, 4, 5]:
|
| 31 |
+
return "Spring"
|
| 32 |
+
elif month in [6, 7, 8]:
|
| 33 |
+
return "Summer"
|
| 34 |
+
else:
|
| 35 |
+
return "Fall"
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
df["season"] = df["month"].apply(get_season)
|
| 39 |
+
|
| 40 |
+
# Weekend indicator
|
| 41 |
+
df["is_weekend"] = (df["day_of_week"] >= 5).astype(int)
|
| 42 |
+
|
| 43 |
+
# School day indicators (assuming Mon-Fri are school days)
|
| 44 |
+
df["is_school_day"] = (df["day_of_week"] < 5).astype(int)
|
| 45 |
+
|
| 46 |
+
# NYC Public School Holidays for 2018-2019 school year
|
| 47 |
+
nyc_holidays_2018_19 = [
|
| 48 |
+
"2018-09-10", # Rosh Hashanah (Observed)
|
| 49 |
+
"2018-09-11", # Rosh Hashanah (Observed)
|
| 50 |
+
"2018-09-19", # Yom Kippur
|
| 51 |
+
"2018-10-08", # Columbus Day
|
| 52 |
+
"2018-11-06", # Election Day
|
| 53 |
+
"2018-11-12", # Veterans Day
|
| 54 |
+
"2018-11-22", # Thanksgiving Day
|
| 55 |
+
"2018-11-23", # Thanksgiving Recess
|
| 56 |
+
"2018-12-24", # Winter Recess
|
| 57 |
+
"2018-12-25", # Christmas Day
|
| 58 |
+
"2018-12-26", # Winter Recess
|
| 59 |
+
"2018-12-27", # Winter Recess
|
| 60 |
+
"2018-12-28", # Winter Recess
|
| 61 |
+
"2018-12-31", # Winter Recess
|
| 62 |
+
"2019-01-01", # New Year's Day
|
| 63 |
+
"2019-01-02", # Winter Recess
|
| 64 |
+
"2019-01-21", # Dr. Martin Luther King Jr. Day
|
| 65 |
+
"2019-02-18", # Midwinter Recess
|
| 66 |
+
"2019-02-19", # Midwinter Recess
|
| 67 |
+
"2019-02-20", # Midwinter Recess
|
| 68 |
+
"2019-02-21", # Midwinter Recess
|
| 69 |
+
"2019-02-22", # Midwinter Recess
|
| 70 |
+
"2019-04-15", # Spring Recess
|
| 71 |
+
"2019-04-16", # Spring Recess
|
| 72 |
+
"2019-04-17", # Spring Recess
|
| 73 |
+
"2019-04-18", # Spring Recess
|
| 74 |
+
"2019-04-19", # Spring Recess
|
| 75 |
+
"2019-04-22", # Spring Recess
|
| 76 |
+
"2019-04-23", # Spring Recess
|
| 77 |
+
"2019-04-24", # Spring Recess
|
| 78 |
+
"2019-04-25", # Spring Recess
|
| 79 |
+
"2019-05-27", # Memorial Day
|
| 80 |
+
"2019-06-06", # Chancellor's Conference Day
|
| 81 |
+
"2019-06-11", # Anniversary Day
|
| 82 |
+
]
|
| 83 |
+
|
| 84 |
+
# Convert to datetime
|
| 85 |
+
holiday_dates = pd.to_datetime(nyc_holidays_2018_19)
|
| 86 |
+
|
| 87 |
+
# Add holiday indicators
|
| 88 |
+
df["is_holiday"] = df["date"].isin(holiday_dates.tolist()).astype(int)
|
| 89 |
+
|
| 90 |
+
# Add proximity to holiday features
|
| 91 |
+
df["days_to_next_holiday"] = 0
|
| 92 |
+
df["days_since_last_holiday"] = 0
|
| 93 |
+
|
| 94 |
+
for idx, row in df.iterrows():
|
| 95 |
+
current_date = row["date"]
|
| 96 |
+
|
| 97 |
+
# Days to next holiday
|
| 98 |
+
future_holidays = holiday_dates[holiday_dates > current_date]
|
| 99 |
+
if len(future_holidays) > 0:
|
| 100 |
+
df.loc[idx, "days_to_next_holiday"] = (
|
| 101 |
+
future_holidays.min() - current_date
|
| 102 |
+
).days
|
| 103 |
+
|
| 104 |
+
# Days since last holiday
|
| 105 |
+
past_holidays = holiday_dates[holiday_dates < current_date]
|
| 106 |
+
if len(past_holidays) > 0:
|
| 107 |
+
df.loc[idx, "days_since_last_holiday"] = (
|
| 108 |
+
current_date - past_holidays.max()
|
| 109 |
+
).days
|
| 110 |
+
|
| 111 |
+
# Special events/conditions that might affect attendance
|
| 112 |
+
df["is_month_start"] = (df["day_of_month"] <= 3).astype(int)
|
| 113 |
+
df["is_month_end"] = (df["day_of_month"] >= 28).astype(int)
|
| 114 |
+
df["is_friday"] = (df["day_of_week"] == 4).astype(int)
|
| 115 |
+
df["is_monday"] = (df["day_of_week"] == 0).astype(int)
|
| 116 |
+
|
| 117 |
+
# Progress through school year (normalized)
|
| 118 |
+
school_year_start = pd.to_datetime("2018-09-04")
|
| 119 |
+
school_year_end = pd.to_datetime("2019-06-26")
|
| 120 |
+
df["school_year_progress"] = (
|
| 121 |
+
(df["date"] - school_year_start).dt.days
|
| 122 |
+
/ (school_year_end - school_year_start).days
|
| 123 |
+
).clip(0, 1)
|
| 124 |
+
|
| 125 |
+
print("Feature Engineering Complete!")
|
| 126 |
+
print(f"Total features created: {len(df.columns)}")
|
| 127 |
+
print("\nNew features added:")
|
| 128 |
+
new_features = [
|
| 129 |
+
col
|
| 130 |
+
for col in df.columns
|
| 131 |
+
if col not in ["School DBN", "Date", "Enrolled", "Absent", "Present", "Released"]
|
| 132 |
+
]
|
| 133 |
+
for feature in new_features:
|
| 134 |
+
print(f"- {feature}")
|
| 135 |
+
|
| 136 |
+
print("\nSample of engineered features:")
|
| 137 |
+
print(
|
| 138 |
+
df[
|
| 139 |
+
[
|
| 140 |
+
"Date",
|
| 141 |
+
"attendance_rate",
|
| 142 |
+
"day_of_week_name",
|
| 143 |
+
"month_name",
|
| 144 |
+
"season",
|
| 145 |
+
"is_holiday",
|
| 146 |
+
"days_to_next_holiday",
|
| 147 |
+
"is_friday",
|
| 148 |
+
]
|
| 149 |
+
].head(10)
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
# Save engineered dataset
|
| 153 |
+
df.to_csv("attendance_with_features.csv", index=False)
|
| 154 |
+
print("\nDataset saved as 'attendance_with_features.csv'")
|
lr_attendance/feature_info.json
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"final_features": [
|
| 3 |
+
"day_of_week",
|
| 4 |
+
"month",
|
| 5 |
+
"quarter",
|
| 6 |
+
"week_of_year",
|
| 7 |
+
"day_of_month",
|
| 8 |
+
"day_of_year",
|
| 9 |
+
"is_weekend",
|
| 10 |
+
"is_school_day",
|
| 11 |
+
"is_month_start",
|
| 12 |
+
"is_month_end",
|
| 13 |
+
"is_friday",
|
| 14 |
+
"is_monday",
|
| 15 |
+
"school_year_progress",
|
| 16 |
+
"is_holiday",
|
| 17 |
+
"days_to_next_holiday",
|
| 18 |
+
"days_since_last_holiday",
|
| 19 |
+
"temp_max",
|
| 20 |
+
"temp_min",
|
| 21 |
+
"temp_mean",
|
| 22 |
+
"temp_range",
|
| 23 |
+
"precipitation_total",
|
| 24 |
+
"rain_total",
|
| 25 |
+
"snow_total",
|
| 26 |
+
"precipitation_hours",
|
| 27 |
+
"wind_speed_max",
|
| 28 |
+
"wind_gust_max",
|
| 29 |
+
"sunshine_duration",
|
| 30 |
+
"daylight_duration",
|
| 31 |
+
"is_rainy_day",
|
| 32 |
+
"is_snowy_day",
|
| 33 |
+
"is_windy_day",
|
| 34 |
+
"is_extreme_temp",
|
| 35 |
+
"weather_severity",
|
| 36 |
+
"temp_humidity_interaction",
|
| 37 |
+
"wind_precip_interaction",
|
| 38 |
+
"holiday_weather_interaction",
|
| 39 |
+
"temp_squared",
|
| 40 |
+
"precipitation_squared",
|
| 41 |
+
"season_encoded"
|
| 42 |
+
],
|
| 43 |
+
"temporal_features": [
|
| 44 |
+
"day_of_week",
|
| 45 |
+
"month",
|
| 46 |
+
"quarter",
|
| 47 |
+
"week_of_year",
|
| 48 |
+
"day_of_month",
|
| 49 |
+
"day_of_year",
|
| 50 |
+
"is_weekend",
|
| 51 |
+
"is_school_day",
|
| 52 |
+
"is_month_start",
|
| 53 |
+
"is_month_end",
|
| 54 |
+
"is_friday",
|
| 55 |
+
"is_monday",
|
| 56 |
+
"school_year_progress"
|
| 57 |
+
],
|
| 58 |
+
"holiday_features": [
|
| 59 |
+
"is_holiday",
|
| 60 |
+
"days_to_next_holiday",
|
| 61 |
+
"days_since_last_holiday"
|
| 62 |
+
],
|
| 63 |
+
"weather_features": [
|
| 64 |
+
"temp_max",
|
| 65 |
+
"temp_min",
|
| 66 |
+
"temp_mean",
|
| 67 |
+
"temp_range",
|
| 68 |
+
"precipitation_total",
|
| 69 |
+
"rain_total",
|
| 70 |
+
"snow_total",
|
| 71 |
+
"precipitation_hours",
|
| 72 |
+
"wind_speed_max",
|
| 73 |
+
"wind_gust_max",
|
| 74 |
+
"sunshine_duration",
|
| 75 |
+
"daylight_duration",
|
| 76 |
+
"is_rainy_day",
|
| 77 |
+
"is_snowy_day",
|
| 78 |
+
"is_windy_day",
|
| 79 |
+
"is_extreme_temp",
|
| 80 |
+
"weather_severity"
|
| 81 |
+
],
|
| 82 |
+
"target_correlations": {
|
| 83 |
+
"attendance_rate": 1.0,
|
| 84 |
+
"school_year_progress": 0.15298705946434624,
|
| 85 |
+
"quarter": 0.09570330953211145,
|
| 86 |
+
"daylight_duration": 0.08974924827192297,
|
| 87 |
+
"is_snowy_day": 0.0851888567499552,
|
| 88 |
+
"days_to_next_holiday": 0.08037939994225182,
|
| 89 |
+
"month": 0.07982450259808478,
|
| 90 |
+
"week_of_year": 0.07477659348367248,
|
| 91 |
+
"day_of_year": 0.07443173468815607,
|
| 92 |
+
"day_of_month": 0.06875319490990786,
|
| 93 |
+
"snow_total": 0.06750581579691282,
|
| 94 |
+
"is_friday": 0.0642459731182472,
|
| 95 |
+
"is_holiday": 0.05859963560671229,
|
| 96 |
+
"precipitation_hours": 0.05855321374409788,
|
| 97 |
+
"is_rainy_day": 0.054753317922505866,
|
| 98 |
+
"precipitation_total": 0.0546754489544741,
|
| 99 |
+
"is_extreme_temp": 0.04877986561324521,
|
| 100 |
+
"day_of_week": 0.04745618753325818,
|
| 101 |
+
"rain_total": 0.04531486647524365,
|
| 102 |
+
"is_month_end": 0.03061269671737044,
|
| 103 |
+
"days_since_last_holiday": 0.029162742921903476,
|
| 104 |
+
"weather_severity": 0.027970448352187427,
|
| 105 |
+
"is_month_start": 0.023797389694042725,
|
| 106 |
+
"temp_mean": 0.013792886147146688,
|
| 107 |
+
"temp_min": 0.013129987102531162,
|
| 108 |
+
"temp_max": 0.012837652673607865,
|
| 109 |
+
"wind_gust_max": 0.010101066578409116,
|
| 110 |
+
"is_monday": 0.009588967551783255,
|
| 111 |
+
"sunshine_duration": 0.003997196985383076,
|
| 112 |
+
"wind_speed_max": 0.0018488325630909438,
|
| 113 |
+
"temp_range": 0.0014876996432690002,
|
| 114 |
+
"is_windy_day": 0.0009326180610937769,
|
| 115 |
+
"is_weekend": NaN,
|
| 116 |
+
"is_school_day": NaN
|
| 117 |
+
}
|
| 118 |
+
}
|
lr_attendance/final_coefficients.csv
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Feature,Coefficient
|
| 2 |
+
school_avg_attendance,7.171348715713736
|
| 3 |
+
school_year_progress,-1.5605297921826944
|
| 4 |
+
days_to_next_holiday,0.7496704390789359
|
| 5 |
+
is_snowy_day,-0.66548297262725
|
| 6 |
+
precipitation_hours,-0.4631543689630163
|
| 7 |
+
is_friday,-0.3230532695248619
|
| 8 |
+
day_of_week,-0.3176427595205633
|
| 9 |
+
temp_mean,0.2923542560616456
|
| 10 |
+
is_holiday,-0.24976999079401288
|
| 11 |
+
is_monday,-0.21064021730157015
|
| 12 |
+
is_rainy_day,0.037596682642466635
|
| 13 |
+
school_std_attendance,-0.0019548833102194414
|
lr_attendance/final_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
lr_attendance/improved_predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
lr_attendance/model_comparison.csv
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Model,R²,RMSE,MAE
|
| 2 |
+
A: Baseline (no school),-0.19632912335295583,13.229195648314445,7.397234417574075
|
| 3 |
+
B: With School Avg ⭐,0.23948647822468483,10.547792357860521,5.784459632103281
|
| 4 |
+
C: Predict Deviation,0.256636459570448,10.428185068430691,5.835936867686833
|
| 5 |
+
D: Log Transform,0.238213937586648,10.556613295212724,5.9642873221393735
|
lr_attendance/model_summary.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
r2_train,r2_test,rmse_test,mae_test,cv_r2_mean,cv_r2_std,n_train,n_test,n_features
|
| 2 |
+
0.6090720669046676,0.609192651642787,5.943806665659979,3.1503211971515586,0.6089838172103861,0.004961794857370117,221720,55431,12
|
lr_attendance/nyc_weather_2018_2019.csv
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
date,temp_max,temp_min,temp_mean,precipitation_total,rain_total,snow_total,precipitation_hours,wind_speed_max,wind_gust_max,weather_code,sunshine_duration,daylight_duration
|
| 2 |
+
2018-09-04,31.2,23.9,27.5,0.0,0.0,0.0,0.0,10.5,21.2,3,43013.66,46669.32
|
| 3 |
+
2018-09-05,29.4,23.2,26.2,1.1,1.1,0.0,4.0,17.9,34.9,53,42916.46,46511.89
|
| 4 |
+
2018-09-06,32.3,23.6,26.8,4.6,4.6,0.0,7.0,17.6,38.5,61,33312.05,46353.66
|
| 5 |
+
2018-09-07,24.3,20.7,22.8,4.9,4.9,0.0,16.0,16.1,31.3,53,1282.37,46194.73
|
| 6 |
+
2018-09-08,21.1,15.6,18.8,6.9,6.9,0.0,12.0,18.4,36.7,55,2515.72,46035.2
|
| 7 |
+
2018-09-09,17.2,13.7,15.3,17.9,17.9,0.0,24.0,18.8,37.4,61,0.0,45875.18
|
| 8 |
+
2018-09-10,17.5,13.6,16.0,30.8,30.8,0.0,24.0,28.8,56.5,63,0.0,45714.75
|
| 9 |
+
2018-09-11,24.4,17.3,20.9,14.7,14.7,0.0,11.0,17.0,36.0,63,4967.27,45554.02
|
| 10 |
+
2018-09-12,25.5,20.6,22.8,17.3,17.3,0.0,16.0,10.8,23.4,63,3325.69,45393.1
|
| 11 |
+
2018-09-13,24.3,20.7,22.2,29.5,29.5,0.0,9.0,17.4,34.2,65,24258.51,45232.07
|
| 12 |
+
2018-09-14,23.9,20.2,21.6,0.4,0.4,0.0,4.0,15.8,32.0,51,3740.55,45071.04
|
| 13 |
+
2018-09-15,24.9,18.3,21.4,0.3,0.3,0.0,3.0,10.1,19.4,51,32685.79,44910.09
|
| 14 |
+
2018-09-16,27.4,18.1,21.7,0.0,0.0,0.0,0.0,14.1,28.4,3,34611.62,44749.33
|
| 15 |
+
2018-09-17,24.7,18.1,22.0,4.8,4.8,0.0,14.0,18.9,39.2,53,6915.09,44588.84
|
| 16 |
+
2018-09-18,25.1,20.4,22.7,20.5,20.5,0.0,14.0,21.6,38.5,63,9241.86,44428.73
|
| 17 |
+
2018-09-19,26.6,19.4,22.4,1.7,1.7,0.0,6.0,16.6,33.5,53,39495.63,44269.07
|
| 18 |
+
2018-09-20,20.8,17.8,19.4,3.2,3.2,0.0,12.0,10.9,23.8,53,11825.79,44109.98
|
| 19 |
+
2018-09-21,23.0,19.4,20.8,0.0,0.0,0.0,0.0,23.2,44.3,3,2846.96,43951.14
|
| 20 |
+
2018-09-22,22.2,16.0,20.0,2.4,2.4,0.0,3.0,23.1,46.1,61,35033.01,43791.55
|
| 21 |
+
2018-09-23,18.8,14.8,16.6,1.9,1.9,0.0,13.0,11.9,24.1,51,0.0,43631.24
|
| 22 |
+
2018-09-24,20.1,14.1,16.8,0.1,0.1,0.0,1.0,23.3,48.2,51,24689.51,43470.31
|
| 23 |
+
2018-09-25,21.8,14.3,18.9,61.4,61.4,0.0,22.0,28.1,56.2,65,0.0,43308.86
|
| 24 |
+
2018-09-26,26.3,20.6,23.1,10.9,10.9,0.0,11.0,22.5,47.2,63,35389.68,43147.0
|
| 25 |
+
2018-09-27,21.8,15.0,18.4,5.7,5.7,0.0,6.0,22.2,39.6,61,32400.0,42984.82
|
| 26 |
+
2018-09-28,19.5,13.9,16.4,29.3,29.3,0.0,9.0,27.3,63.4,63,19786.42,42822.43
|
| 27 |
+
2018-09-29,22.2,13.7,17.5,0.0,0.0,0.0,0.0,11.8,25.2,3,38584.73,42659.93
|
| 28 |
+
2018-09-30,20.2,12.2,16.3,0.0,0.0,0.0,0.0,13.6,26.3,3,38508.36,42497.42
|
| 29 |
+
2018-10-01,25.0,15.5,19.9,0.9,0.9,0.0,3.0,19.2,38.9,51,26480.82,42335.0
|
| 30 |
+
2018-10-02,24.8,17.1,20.7,32.9,32.9,0.0,11.0,16.2,34.9,65,22627.44,42172.78
|
| 31 |
+
2018-10-03,24.4,16.4,20.4,0.0,0.0,0.0,0.0,18.2,33.8,3,37493.02,42010.86
|
| 32 |
+
2018-10-04,25.8,15.2,20.5,1.8,1.8,0.0,3.0,18.9,38.2,61,34062.29,41849.33
|
| 33 |
+
2018-10-05,20.1,13.9,17.2,0.0,0.0,0.0,0.0,21.5,40.0,3,38192.4,41688.32
|
| 34 |
+
2018-10-06,19.8,17.5,18.6,0.6,0.6,0.0,4.0,19.1,36.0,51,287.69,41527.91
|
| 35 |
+
2018-10-07,26.5,19.3,22.3,0.1,0.1,0.0,1.0,16.1,29.9,51,10800.0,41368.22
|
| 36 |
+
2018-10-08,19.8,17.7,18.8,0.8,0.8,0.0,5.0,17.8,34.2,51,0.0,41209.36
|
| 37 |
+
2018-10-09,25.0,19.2,21.6,0.5,0.5,0.0,5.0,15.3,29.2,51,20834.79,41051.43
|
| 38 |
+
2018-10-10,25.5,20.5,22.2,0.6,0.6,0.0,6.0,16.9,32.4,51,27016.84,40894.55
|
| 39 |
+
2018-10-11,25.1,20.8,22.4,14.1,14.1,0.0,15.0,30.3,55.8,61,5102.43,40738.38
|
| 40 |
+
2018-10-12,21.0,10.8,15.2,6.5,6.5,0.0,5.0,32.9,60.8,63,32103.53,40581.84
|
| 41 |
+
2018-10-13,13.6,8.6,10.8,3.8,3.8,0.0,8.0,21.7,41.8,53,11341.76,40424.94
|
| 42 |
+
2018-10-14,14.6,6.8,10.9,0.0,0.0,0.0,0.0,14.6,29.5,3,23733.44,40267.82
|
| 43 |
+
2018-10-15,20.0,9.5,15.8,5.3,5.3,0.0,12.0,27.2,55.1,61,1142.64,40110.58
|
| 44 |
+
2018-10-16,15.9,8.1,11.1,0.5,0.5,0.0,2.0,29.0,52.2,51,33615.7,39953.36
|
| 45 |
+
2018-10-17,16.3,7.6,11.2,0.0,0.0,0.0,0.0,30.4,56.2,3,35979.66,39796.27
|
| 46 |
+
2018-10-18,9.6,4.7,7.0,0.0,0.0,0.0,0.0,23.6,46.1,1,36329.9,39639.44
|
| 47 |
+
2018-10-19,14.6,4.2,9.6,0.3,0.3,0.0,1.0,20.5,39.2,51,34590.27,39482.98
|
| 48 |
+
2018-10-20,17.1,9.1,13.1,6.1,6.1,0.0,7.0,23.1,47.5,63,27650.76,39327.03
|
| 49 |
+
2018-10-21,9.2,3.8,7.0,0.3,0.3,0.0,2.0,33.1,63.0,51,27771.21,39171.71
|
| 50 |
+
2018-10-22,10.6,3.5,7.2,0.0,0.0,0.0,0.0,17.1,35.3,3,31360.17,39017.16
|
| 51 |
+
2018-10-23,16.7,7.1,11.3,0.1,0.1,0.0,1.0,18.7,38.5,51,32036.72,38863.5
|
| 52 |
+
2018-10-24,13.1,6.1,9.0,0.0,0.0,0.0,0.0,30.3,56.9,3,34822.13,38710.88
|
| 53 |
+
2018-10-25,10.4,3.9,6.6,0.0,0.0,0.0,0.0,19.6,39.2,3,34795.7,38559.43
|
| 54 |
+
2018-10-26,10.1,2.5,6.5,0.2,0.2,0.0,1.0,18.0,32.8,51,32794.06,38409.29
|
| 55 |
+
2018-10-27,11.5,6.5,9.0,40.5,40.5,0.0,24.0,36.4,72.0,63,0.0,38260.59
|
| 56 |
+
2018-10-28,11.6,6.8,9.3,1.3,1.3,0.0,5.0,17.2,39.2,53,16467.77,38113.5
|
| 57 |
+
2018-10-29,13.6,7.2,10.4,2.0,2.0,0.0,7.0,23.3,44.3,53,29238.26,37968.17
|
| 58 |
+
2018-10-30,12.4,4.2,7.7,0.0,0.0,0.0,0.0,20.7,41.0,0,34033.08,37824.72
|
| 59 |
+
2018-10-31,17.0,4.4,11.2,0.0,0.0,0.0,0.0,22.5,43.2,3,32941.03,37682.86
|
| 60 |
+
2018-11-01,20.0,11.0,15.6,0.1,0.1,0.0,1.0,23.2,41.4,51,30837.66,37541.34
|
| 61 |
+
2018-11-02,20.9,17.2,19.1,10.1,10.1,0.0,19.0,29.4,62.6,61,0.0,37400.25
|
| 62 |
+
2018-11-03,18.5,7.3,13.3,8.1,8.1,0.0,7.0,33.8,79.9,61,22803.61,37259.71
|
| 63 |
+
2018-11-04,11.8,4.7,7.8,0.0,0.0,0.0,0.0,11.3,20.5,3,33370.03,37119.88
|
| 64 |
+
2018-11-05,11.4,7.1,9.8,6.2,6.2,0.0,12.0,22.0,40.3,61,0.0,36980.89
|
| 65 |
+
2018-11-06,16.6,9.4,12.2,13.6,13.6,0.0,10.0,25.8,76.0,63,0.0,36842.9
|
| 66 |
+
2018-11-07,16.8,8.2,12.8,0.0,0.0,0.0,0.0,23.1,43.9,3,32248.07,36706.06
|
| 67 |
+
2018-11-08,12.2,5.2,8.8,0.0,0.0,0.0,0.0,18.2,32.0,3,30385.53,36570.53
|
| 68 |
+
2018-11-09,10.4,4.0,7.5,14.9,14.9,0.0,9.0,24.3,47.2,63,5898.37,36436.46
|
| 69 |
+
2018-11-10,9.6,1.1,5.8,0.7,0.7,0.0,4.0,33.4,63.0,51,32483.88,36304.03
|
| 70 |
+
2018-11-11,7.6,0.1,3.2,0.0,0.0,0.0,0.0,17.4,36.4,0,32247.95,36173.38
|
| 71 |
+
2018-11-12,8.8,0.2,4.7,1.1,1.1,0.0,2.0,12.6,26.3,55,28714.71,36044.69
|
| 72 |
+
2018-11-13,10.3,5.7,7.8,18.5,18.5,0.0,12.0,25.3,50.8,63,0.0,35918.14
|
| 73 |
+
2018-11-14,5.2,-0.2,2.4,0.0,0.0,0.0,0.0,26.2,51.5,3,31707.88,35793.89
|
| 74 |
+
2018-11-15,3.9,-1.5,0.7,22.6,6.4,11.34,11.0,35.0,61.9,75,0.0,35672.12
|
| 75 |
+
2018-11-16,6.2,1.8,4.1,9.6,7.5,1.47,10.0,39.1,82.8,75,28800.0,35553.02
|
| 76 |
+
2018-11-17,8.7,0.8,4.4,0.0,0.0,0.0,0.0,20.2,42.1,3,30982.55,35436.78
|
| 77 |
+
2018-11-18,5.6,1.1,3.8,0.4,0.2,0.14,2.0,11.3,22.0,71,9379.0,35323.56
|
| 78 |
+
2018-11-19,11.6,2.7,6.5,0.0,0.0,0.0,0.0,9.5,19.1,3,20971.63,35213.56
|
| 79 |
+
2018-11-20,8.1,2.2,5.4,0.5,0.5,0.0,1.0,22.7,42.5,53,21543.86,35106.54
|
| 80 |
+
2018-11-21,7.3,-3.0,2.8,0.2,0.2,0.0,2.0,28.5,55.8,51,31189.29,35001.36
|
| 81 |
+
2018-11-22,-3.2,-7.7,-5.4,0.0,0.0,0.0,0.0,27.8,53.3,3,31141.97,34898.11
|
| 82 |
+
2018-11-23,-0.8,-8.4,-4.5,0.0,0.0,0.0,0.0,16.9,34.2,3,31044.16,34796.89
|
| 83 |
+
2018-11-24,7.9,-2.6,3.2,16.1,16.1,0.0,7.0,20.9,40.3,63,11437.53,34697.86
|
| 84 |
+
2018-11-25,12.2,4.2,7.9,22.8,22.8,0.0,6.0,33.4,59.8,65,25200.0,34601.13
|
| 85 |
+
2018-11-26,10.8,3.7,7.4,30.2,30.2,0.0,16.0,35.3,63.7,65,477.6,34506.86
|
| 86 |
+
2018-11-27,7.0,3.0,4.9,1.8,1.8,0.0,4.0,32.5,61.2,53,12559.81,34415.18
|
| 87 |
+
2018-11-28,5.2,2.0,3.5,0.0,0.0,0.0,0.0,36.6,68.8,3,17004.59,34326.23
|
| 88 |
+
2018-11-29,6.4,1.8,4.3,0.0,0.0,0.0,0.0,31.4,59.8,3,29901.48,34240.14
|
| 89 |
+
2018-11-30,4.8,-0.1,2.8,1.3,1.0,0.21,3.0,9.4,31.7,71,9364.26,34157.08
|
| 90 |
+
2018-12-01,6.3,-1.0,3.3,3.0,3.0,0.0,6.0,12.6,22.0,53,26881.31,34077.17
|
| 91 |
+
2018-12-02,14.6,5.2,10.6,16.6,16.6,0.0,21.0,18.0,35.3,63,0.0,34000.57
|
| 92 |
+
2018-12-03,12.8,6.2,10.1,0.0,0.0,0.0,0.0,26.3,48.6,3,26693.77,33927.42
|
| 93 |
+
2018-12-04,6.2,-2.0,2.0,0.0,0.0,0.0,0.0,23.0,44.6,3,29895.79,33857.86
|
| 94 |
+
2018-12-05,2.2,-3.8,-1.1,0.0,0.0,0.0,0.0,13.6,28.1,3,20367.7,33792.06
|
| 95 |
+
2018-12-06,4.2,-3.0,0.7,0.0,0.0,0.0,0.0,21.4,41.0,3,25925.27,33730.13
|
| 96 |
+
2018-12-07,2.7,-4.2,0.2,0.0,0.0,0.0,0.0,21.9,43.9,1,29926.36,33672.25
|
| 97 |
+
2018-12-08,1.9,-4.9,-1.8,0.0,0.0,0.0,0.0,19.8,39.2,3,19941.12,33618.54
|
| 98 |
+
2018-12-09,1.6,-4.4,-1.4,0.0,0.0,0.0,0.0,12.3,27.4,3,20439.16,33569.16
|
| 99 |
+
2018-12-10,3.8,-3.5,-0.7,0.0,0.0,0.0,0.0,17.2,36.0,3,29698.45,33524.03
|
| 100 |
+
2018-12-11,3.7,-3.9,-0.7,0.0,0.0,0.0,0.0,18.7,33.8,1,29673.37,33482.58
|
| 101 |
+
2018-12-12,5.1,-2.0,1.1,0.0,0.0,0.0,0.0,17.1,34.9,3,24647.14,33444.81
|
| 102 |
+
2018-12-13,3.8,-0.9,1.6,0.9,0.5,0.28,6.0,19.8,37.1,71,6975.15,33410.76
|
| 103 |
+
2018-12-14,9.9,2.8,6.3,1.4,1.4,0.0,5.0,15.5,29.9,51,6838.5,33380.45
|
| 104 |
+
2018-12-15,9.6,5.9,7.6,4.7,4.7,0.0,11.0,25.3,43.2,53,0.0,33353.91
|
| 105 |
+
2018-12-16,6.7,1.6,3.9,23.1,21.7,0.98,22.0,28.0,51.1,73,0.0,33331.19
|
| 106 |
+
2018-12-17,7.6,1.8,4.1,0.6,0.6,0.0,3.0,29.3,52.9,51,18524.88,33312.29
|
| 107 |
+
2018-12-18,2.3,-3.3,-0.4,0.0,0.0,0.0,0.0,29.8,53.6,3,29200.12,33297.25
|
| 108 |
+
2018-12-19,4.7,-4.6,-0.6,0.0,0.0,0.0,0.0,13.8,26.6,3,29139.29,33286.09
|
| 109 |
+
2018-12-20,10.6,-2.4,3.4,7.7,7.7,0.0,9.0,18.5,31.3,61,21520.87,33278.84
|
| 110 |
+
2018-12-21,14.6,10.0,12.4,43.6,43.6,0.0,16.0,38.6,82.8,65,0.0,33275.5
|
| 111 |
+
2018-12-22,9.9,3.9,6.5,3.3,3.3,0.0,3.0,31.1,60.5,61,18855.64,33276.11
|
| 112 |
+
2018-12-23,5.8,-0.2,2.7,0.0,0.0,0.0,0.0,22.9,45.0,3,28909.64,33280.67
|
| 113 |
+
2018-12-24,6.8,0.6,2.9,2.6,0.7,1.33,11.0,19.3,36.7,73,18146.82,33289.2
|
| 114 |
+
2018-12-25,4.7,-1.4,1.2,0.0,0.0,0.0,0.0,18.6,38.9,3,25764.34,33301.71
|
| 115 |
+
2018-12-26,5.8,-2.5,1.0,0.0,0.0,0.0,0.0,18.0,34.2,3,16403.13,33318.21
|
| 116 |
+
2018-12-27,4.1,0.1,1.9,0.0,0.0,0.0,0.0,16.0,28.4,3,28955.64,33338.71
|
| 117 |
+
2018-12-28,13.4,4.9,10.2,28.5,28.5,0.0,20.0,27.7,62.3,63,0.0,33363.21
|
| 118 |
+
2018-12-29,12.4,1.9,8.2,0.1,0.1,0.0,1.0,25.8,49.0,51,28989.88,33391.73
|
| 119 |
+
2018-12-30,5.2,-0.6,1.7,0.3,0.0,0.21,2.0,13.1,24.1,71,12333.29,33424.39
|
| 120 |
+
2018-12-31,7.9,-1.1,3.4,22.1,22.1,0.0,10.0,24.5,43.6,63,5883.96,33461.48
|
| 121 |
+
2019-01-01,14.2,3.2,9.4,3.1,3.1,0.0,3.0,38.0,67.7,61,18452.99,33502.93
|
| 122 |
+
2019-01-02,3.7,-0.6,1.9,0.0,0.0,0.0,0.0,15.8,28.4,3,19357.07,33548.61
|
| 123 |
+
2019-01-03,7.1,0.3,4.2,0.0,0.0,0.0,0.0,24.2,47.2,3,22127.57,33598.4
|
| 124 |
+
2019-01-04,8.0,-1.4,2.8,0.0,0.0,0.0,0.0,19.6,37.8,3,24464.69,33652.18
|
| 125 |
+
2019-01-05,6.7,3.0,5.4,20.6,20.6,0.0,18.0,22.7,42.1,63,0.0,33709.84
|
| 126 |
+
2019-01-06,8.6,-1.0,3.8,0.2,0.2,0.0,1.0,30.0,56.9,51,28503.95,33771.23
|
| 127 |
+
2019-01-07,0.2,-5.3,-2.1,0.2,0.1,0.07,2.0,17.2,32.4,71,23996.83,33836.26
|
| 128 |
+
2019-01-08,8.2,0.4,4.3,0.2,0.2,0.0,2.0,13.4,33.8,51,10552.66,33904.79
|
| 129 |
+
2019-01-09,7.1,0.3,4.4,3.6,3.6,0.0,4.0,34.6,65.2,61,25488.23,33976.69
|
| 130 |
+
2019-01-10,0.8,-2.1,-0.4,0.0,0.0,0.0,0.0,36.7,68.4,3,20963.05,34051.85
|
| 131 |
+
2019-01-11,-1.4,-5.9,-3.9,0.0,0.0,0.0,0.0,26.3,51.5,3,29951.96,34130.14
|
| 132 |
+
2019-01-12,2.3,-6.4,-2.8,0.0,0.0,0.0,0.0,10.6,20.9,3,29357.16,34211.44
|
| 133 |
+
2019-01-13,0.3,-4.1,-2.5,0.8,0.0,0.56,6.0,17.3,32.8,71,25903.73,34295.64
|
| 134 |
+
2019-01-14,1.1,-6.4,-3.7,0.0,0.0,0.0,0.0,20.3,37.1,3,30076.59,34382.6
|
| 135 |
+
2019-01-15,3.5,-4.8,-1.9,0.0,0.0,0.0,0.0,9.5,23.4,3,29494.34,34472.21
|
| 136 |
+
2019-01-16,4.6,-4.1,-0.2,0.0,0.0,0.0,0.0,22.4,43.9,3,30002.61,34564.36
|
| 137 |
+
2019-01-17,-0.2,-5.5,-2.6,0.3,0.0,0.21,1.0,18.2,33.1,73,22266.33,34658.92
|
| 138 |
+
2019-01-18,3.6,-2.2,0.3,1.4,0.0,0.98,8.0,12.3,21.6,73,13850.21,34755.77
|
| 139 |
+
2019-01-19,3.2,-3.7,-0.1,5.3,0.6,3.29,4.0,19.7,35.3,75,14658.24,34855.22
|
| 140 |
+
2019-01-20,8.8,-11.5,-0.7,29.6,27.3,1.61,13.0,31.3,61.2,75,6425.02,34958.25
|
| 141 |
+
2019-01-21,-11.9,-15.8,-13.4,0.0,0.0,0.0,0.0,33.3,62.6,3,30776.06,35064.75
|
| 142 |
+
2019-01-22,-2.2,-12.1,-7.3,0.0,0.0,0.0,0.0,19.5,36.0,3,30510.7,35174.51
|
| 143 |
+
2019-01-23,7.7,-3.5,1.8,0.1,0.1,0.0,1.0,22.9,40.7,51,11909.11,35287.37
|
| 144 |
+
2019-01-24,12.6,0.1,8.4,34.4,34.4,0.0,18.0,42.9,92.2,63,0.0,35403.12
|
| 145 |
+
2019-01-25,2.8,-3.8,-1.0,0.0,0.0,0.0,0.0,31.6,59.0,3,26179.88,35521.59
|
| 146 |
+
2019-01-26,1.0,-6.3,-2.9,0.0,0.0,0.0,0.0,13.0,29.5,3,31924.3,35642.61
|
| 147 |
+
2019-01-27,9.6,-2.7,3.0,0.0,0.0,0.0,0.0,28.8,55.4,3,28750.15,35765.98
|
| 148 |
+
2019-01-28,1.3,-5.8,-2.8,0.0,0.0,0.0,0.0,18.9,34.6,3,31577.17,35891.54
|
| 149 |
+
2019-01-29,4.3,-5.8,-0.2,5.7,5.1,0.42,12.0,22.8,40.3,73,0.0,36019.12
|
| 150 |
+
2019-01-30,0.3,-14.8,-6.6,0.4,0.0,0.28,3.0,38.4,71.6,71,26884.57,36148.55
|
| 151 |
+
2019-01-31,-8.4,-16.6,-12.5,0.0,0.0,0.0,0.0,21.3,44.6,0,32793.02,36279.68
|
| 152 |
+
2019-02-01,-4.8,-12.1,-8.8,0.0,0.0,0.0,0.0,13.2,24.1,3,27911.26,36412.32
|
| 153 |
+
2019-02-02,2.5,-9.9,-4.2,0.0,0.0,0.0,0.0,22.8,43.9,3,32403.46,36546.34
|
| 154 |
+
2019-02-03,10.0,-3.5,1.9,0.0,0.0,0.0,0.0,16.6,32.4,2,33226.9,36681.56
|
| 155 |
+
2019-02-04,11.9,-1.8,3.9,0.0,0.0,0.0,0.0,14.1,30.6,1,33373.6,36817.85
|
| 156 |
+
2019-02-05,17.6,0.7,7.5,0.0,0.0,0.0,0.0,17.4,29.9,3,31915.86,36955.05
|
| 157 |
+
2019-02-06,7.1,-1.3,3.0,9.9,9.9,0.0,6.0,17.0,31.0,63,27012.72,37093.02
|
| 158 |
+
2019-02-07,7.8,2.4,5.0,5.1,5.1,0.0,5.0,15.6,27.0,61,3400.59,37231.61
|
| 159 |
+
2019-02-08,11.2,-0.9,5.4,9.0,9.0,0.0,12.0,33.3,61.6,61,17612.41,37371.16
|
| 160 |
+
2019-02-09,0.6,-5.2,-3.0,0.0,0.0,0.0,0.0,31.8,62.3,2,34118.37,37512.93
|
| 161 |
+
2019-02-10,2.8,-6.2,-2.2,0.0,0.0,0.0,0.0,13.2,28.1,3,34271.85,37656.82
|
| 162 |
+
2019-02-11,2.8,-2.0,-0.0,0.3,0.0,0.21,3.0,11.6,23.0,71,12777.81,37802.67
|
| 163 |
+
2019-02-12,0.9,-2.8,-1.3,16.4,10.9,3.85,16.0,26.7,49.3,75,0.0,37950.3
|
| 164 |
+
2019-02-13,5.7,-1.8,2.0,2.2,2.2,0.0,4.0,34.9,63.7,53,21005.38,38099.58
|
| 165 |
+
2019-02-14,7.1,-3.0,1.5,0.0,0.0,0.0,0.0,19.6,39.2,3,34822.73,38250.33
|
| 166 |
+
2019-02-15,13.0,2.8,8.3,0.3,0.3,0.0,3.0,25.2,42.1,51,22442.48,38402.41
|
| 167 |
+
2019-02-16,5.8,-2.3,1.8,0.0,0.0,0.0,0.0,20.6,45.0,3,34997.9,38555.66
|
| 168 |
+
2019-02-17,2.3,-5.4,-1.2,1.1,1.1,0.0,3.0,14.8,31.3,53,35076.93,38709.95
|
| 169 |
+
2019-02-18,6.4,-3.8,0.9,4.0,2.7,0.91,9.0,29.0,55.1,73,10243.74,38865.12
|
| 170 |
+
2019-02-19,2.4,-5.6,-2.4,0.0,0.0,0.0,0.0,19.5,36.0,3,35339.56,39021.04
|
| 171 |
+
2019-02-20,-0.8,-4.2,-2.7,12.0,4.1,5.53,13.0,16.3,47.9,75,0.0,39177.56
|
| 172 |
+
2019-02-21,11.3,-0.1,4.4,3.7,3.7,0.0,4.0,19.8,38.9,61,35080.43,39334.55
|
| 173 |
+
2019-02-22,7.0,-0.4,3.1,0.0,0.0,0.0,0.0,17.3,34.2,3,27432.9,39491.88
|
| 174 |
+
2019-02-23,4.6,-2.5,1.1,1.5,1.5,0.0,3.0,11.9,24.8,53,20485.7,39649.42
|
| 175 |
+
2019-02-24,9.4,2.1,6.4,19.9,19.9,0.0,13.0,35.5,68.8,63,1091.64,39807.03
|
| 176 |
+
2019-02-25,5.0,-3.0,1.7,0.0,0.0,0.0,0.0,44.6,82.4,3,33542.79,39964.58
|
| 177 |
+
2019-02-26,3.0,-4.8,-2.1,0.0,0.0,0.0,0.0,25.3,50.8,3,36472.96,40121.95
|
| 178 |
+
2019-02-27,-1.7,-5.5,-3.7,0.5,0.0,0.35,4.0,18.3,36.7,71,14046.06,40279.02
|
| 179 |
+
2019-02-28,2.7,-4.2,-1.1,0.3,0.0,0.21,2.0,26.1,51.5,71,34680.77,40436.11
|
| 180 |
+
2019-03-01,2.8,-3.5,-0.6,2.1,0.0,1.47,6.0,13.3,26.3,73,7248.83,40594.44
|
| 181 |
+
2019-03-02,3.7,-1.0,1.3,9.5,0.0,6.72,9.0,18.4,33.8,75,25391.91,40753.93
|
| 182 |
+
2019-03-03,5.7,-0.9,1.9,10.0,0.3,6.79,7.0,14.5,28.1,75,28063.99,40914.49
|
| 183 |
+
2019-03-04,2.3,-5.7,-0.2,16.1,0.0,11.27,6.0,21.3,38.2,75,31227.24,41075.97
|
| 184 |
+
2019-03-05,-1.6,-14.6,-6.2,0.0,0.0,0.0,0.0,19.0,34.2,2,37628.05,41238.26
|
| 185 |
+
2019-03-06,-3.0,-9.8,-6.0,0.0,0.0,0.0,0.0,26.0,51.1,3,29823.71,41401.25
|
| 186 |
+
2019-03-07,0.6,-9.2,-4.5,0.0,0.0,0.0,0.0,26.3,51.5,3,32797.89,41564.82
|
| 187 |
+
2019-03-08,2.6,-7.2,-2.0,0.0,0.0,0.0,0.0,14.3,29.5,3,34844.63,41728.84
|
| 188 |
+
2019-03-09,8.2,-4.1,1.4,0.0,0.0,0.0,0.0,13.0,23.8,3,38169.29,41893.22
|
| 189 |
+
2019-03-10,7.8,1.1,3.6,14.0,11.1,2.03,10.0,24.2,46.8,75,0.0,42057.83
|
| 190 |
+
2019-03-11,10.6,2.0,6.3,0.0,0.0,0.0,0.0,26.0,49.7,3,38610.58,42222.57
|
| 191 |
+
2019-03-12,5.0,-1.5,1.7,0.0,0.0,0.0,0.0,25.0,49.0,3,38898.82,42387.31
|
| 192 |
+
2019-03-13,6.6,-4.9,1.7,0.0,0.0,0.0,0.0,17.2,33.5,3,33571.29,42551.96
|
| 193 |
+
2019-03-14,11.1,0.8,6.8,0.0,0.0,0.0,0.0,21.5,40.3,3,38560.82,42716.4
|
| 194 |
+
2019-03-15,21.8,9.8,15.3,20.4,20.4,0.0,7.0,29.4,53.3,65,17998.09,42880.52
|
| 195 |
+
2019-03-16,10.3,1.7,6.6,0.0,0.0,0.0,0.0,29.9,57.2,3,37108.74,43044.21
|
| 196 |
+
2019-03-17,5.9,-2.0,1.7,0.0,0.0,0.0,0.0,20.0,40.0,2,40026.87,43207.36
|
| 197 |
+
2019-03-18,7.1,-4.1,1.5,0.0,0.0,0.0,0.0,16.9,34.9,3,35062.98,43369.86
|
| 198 |
+
2019-03-19,8.0,-3.2,2.6,0.0,0.0,0.0,0.0,16.2,33.5,3,32147.55,43531.59
|
| 199 |
+
2019-03-20,9.9,-1.5,4.1,0.0,0.0,0.0,0.0,24.6,47.2,3,39737.39,43692.88
|
| 200 |
+
2019-03-21,8.6,3.0,5.9,18.3,18.3,0.0,20.0,25.9,51.8,63,0.0,43854.82
|
| 201 |
+
2019-03-22,9.3,2.7,5.8,13.2,13.2,0.0,14.0,37.7,68.8,63,13580.49,44017.36
|
| 202 |
+
2019-03-23,8.4,-0.2,3.7,0.3,0.0,0.21,3.0,34.2,62.6,71,31687.92,44180.42
|
| 203 |
+
2019-03-24,13.3,-1.4,6.5,0.0,0.0,0.0,0.0,20.8,39.2,3,28146.08,44343.9
|
| 204 |
+
2019-03-25,9.1,3.0,7.6,1.1,1.1,0.0,5.0,17.6,36.4,51,3276.7,44507.68
|
| 205 |
+
2019-03-26,7.9,-2.5,2.8,0.0,0.0,0.0,0.0,21.8,42.5,1,41493.85,44671.69
|
| 206 |
+
2019-03-27,6.4,-2.6,1.9,0.0,0.0,0.0,0.0,15.8,33.1,3,41656.98,44835.8
|
| 207 |
+
2019-03-28,8.8,-2.5,4.1,0.0,0.0,0.0,0.0,24.9,48.2,3,39910.41,44999.93
|
| 208 |
+
2019-03-29,12.6,5.0,9.1,0.8,0.8,0.0,5.0,12.2,25.6,51,921.16,45163.97
|
| 209 |
+
2019-03-30,16.2,6.8,11.4,0.0,0.0,0.0,0.0,29.7,54.7,3,33658.74,45327.82
|
| 210 |
+
2019-03-31,17.6,3.5,10.2,4.6,4.6,0.0,5.0,26.4,52.9,61,19279.57,45491.37
|
| 211 |
+
2019-04-01,7.4,-0.9,3.0,0.0,0.0,0.0,0.0,31.3,59.4,3,42467.27,45654.52
|
| 212 |
+
2019-04-02,8.7,-2.0,4.2,1.0,1.0,0.0,6.0,23.7,47.9,51,28598.75,45817.16
|
| 213 |
+
2019-04-03,16.5,2.1,9.6,0.5,0.5,0.0,3.0,38.5,71.3,51,42786.98,45979.19
|
| 214 |
+
2019-04-04,13.4,2.0,7.7,0.0,0.0,0.0,0.0,27.1,53.6,3,42945.48,46140.5
|
| 215 |
+
2019-04-05,5.3,1.5,3.6,7.3,7.3,0.0,14.0,15.7,31.0,61,1656.07,46300.98
|
| 216 |
+
2019-04-06,20.0,3.6,10.1,0.2,0.2,0.0,2.0,21.1,41.0,51,39557.13,46460.52
|
| 217 |
+
2019-04-07,16.1,4.2,10.5,0.0,0.0,0.0,0.0,18.5,38.2,3,36196.37,46619.0
|
| 218 |
+
2019-04-08,24.2,8.6,15.8,12.6,12.6,0.0,8.0,23.6,57.6,63,24721.92,46776.31
|
| 219 |
+
2019-04-09,12.2,4.4,8.6,0.9,0.9,0.0,2.0,21.0,50.0,53,36397.97,46932.74
|
| 220 |
+
2019-04-10,14.2,4.3,9.0,0.0,0.0,0.0,0.0,24.3,48.2,3,43200.0,47089.33
|
| 221 |
+
2019-04-11,10.4,2.7,6.3,0.0,0.0,0.0,0.0,14.1,28.1,3,36565.94,47246.04
|
| 222 |
+
2019-04-12,18.6,6.2,12.2,15.2,15.2,0.0,9.0,31.0,56.5,63,22271.96,47402.77
|
| 223 |
+
2019-04-13,23.0,14.1,17.5,12.6,12.6,0.0,9.0,22.5,46.8,63,22158.37,47559.43
|
| 224 |
+
2019-04-14,18.5,10.9,15.2,1.9,1.9,0.0,5.0,25.6,43.6,55,22183.88,47715.9
|
| 225 |
+
2019-04-15,18.5,7.2,13.4,15.0,15.0,0.0,10.0,37.6,81.0,63,29649.22,47872.08
|
| 226 |
+
2019-04-16,17.0,5.1,10.9,0.3,0.3,0.0,1.0,28.1,54.4,51,43705.63,48027.87
|
| 227 |
+
2019-04-17,14.9,7.8,10.4,0.4,0.4,0.0,1.0,20.8,38.2,51,36000.0,48183.14
|
| 228 |
+
2019-04-18,16.6,8.6,12.5,1.7,1.7,0.0,8.0,24.2,47.9,53,14400.0,48337.8
|
| 229 |
+
2019-04-19,23.6,15.4,18.5,5.4,5.4,0.0,10.0,32.4,58.7,63,32775.27,48491.73
|
| 230 |
+
2019-04-20,17.7,11.4,15.4,11.8,11.8,0.0,13.0,31.8,73.1,61,360.09,48644.81
|
| 231 |
+
2019-04-21,16.9,9.9,13.3,0.3,0.3,0.0,2.0,20.9,41.0,51,35473.65,48796.92
|
| 232 |
+
2019-04-22,17.0,9.5,13.4,8.1,8.1,0.0,15.0,22.9,43.9,61,14815.11,48947.95
|
| 233 |
+
2019-04-23,22.8,9.8,15.8,0.2,0.2,0.0,2.0,21.9,42.5,51,43296.33,49097.76
|
| 234 |
+
2019-04-24,19.5,10.1,16.0,0.0,0.0,0.0,0.0,20.9,43.9,3,44698.35,49246.24
|
| 235 |
+
2019-04-25,16.2,7.5,11.4,1.9,1.9,0.0,6.0,17.0,36.0,53,32403.16,49393.25
|
| 236 |
+
2019-04-26,16.6,10.2,13.4,18.9,18.9,0.0,21.0,28.9,58.3,61,5904.13,49538.66
|
| 237 |
+
2019-04-27,12.1,7.4,10.1,3.1,3.1,0.0,4.0,38.2,72.7,61,35685.02,49682.34
|
| 238 |
+
2019-04-28,10.8,5.8,8.5,1.9,1.9,0.0,6.0,18.9,36.0,55,9822.66,49824.16
|
| 239 |
+
2019-04-29,13.1,3.1,8.5,0.9,0.9,0.0,2.0,18.3,36.7,53,38090.4,49964.36
|
| 240 |
+
2019-04-30,18.9,8.6,12.1,3.7,3.7,0.0,6.0,15.0,31.7,55,13472.75,50103.94
|
| 241 |
+
2019-05-01,12.4,8.8,10.6,1.7,1.7,0.0,6.0,18.8,38.2,53,88.17,50242.86
|
| 242 |
+
2019-05-02,23.9,10.2,14.7,5.7,5.7,0.0,7.0,19.6,44.3,61,39169.56,50381.0
|
| 243 |
+
2019-05-03,14.1,9.5,11.5,1.1,1.1,0.0,4.0,12.7,25.6,51,0.0,50518.27
|
| 244 |
+
2019-05-04,20.6,10.8,14.9,9.6,9.6,0.0,11.0,8.3,19.1,61,19398.42,50654.52
|
| 245 |
+
2019-05-05,13.3,10.9,12.0,32.3,32.3,0.0,24.0,24.5,44.6,63,0.0,50789.64
|
| 246 |
+
2019-05-06,20.5,9.4,14.0,0.2,0.2,0.0,2.0,18.5,33.5,51,40686.59,50923.5
|
| 247 |
+
2019-05-07,19.1,10.2,14.4,4.3,4.3,0.0,5.0,18.9,39.6,61,45192.72,51055.99
|
| 248 |
+
2019-05-08,19.4,10.1,14.2,0.0,0.0,0.0,0.0,16.8,33.1,3,35886.93,51186.98
|
| 249 |
+
2019-05-09,15.2,9.4,12.0,0.1,0.1,0.0,1.0,21.4,42.1,51,18233.76,51316.32
|
| 250 |
+
2019-05-10,22.6,12.1,17.8,2.3,2.3,0.0,6.0,21.4,40.7,53,16369.52,51443.89
|
| 251 |
+
2019-05-11,19.5,11.2,16.0,3.9,3.9,0.0,5.0,19.3,40.0,61,39146.99,51569.55
|
| 252 |
+
2019-05-12,12.8,6.8,9.0,28.7,28.7,0.0,23.0,32.6,61.6,63,0.0,51693.18
|
| 253 |
+
2019-05-13,10.9,6.8,8.5,17.7,17.7,0.0,14.0,20.5,41.0,63,3600.0,51814.6
|
| 254 |
+
2019-05-14,11.9,6.9,9.0,5.1,5.1,0.0,12.0,16.4,30.2,53,1020.25,51933.69
|
| 255 |
+
2019-05-15,19.4,6.1,13.0,0.5,0.5,0.0,3.0,22.3,43.6,51,45486.12,52050.3
|
| 256 |
+
2019-05-16,20.6,9.6,15.7,0.8,0.8,0.0,6.0,15.4,36.4,51,48691.21,52164.29
|
| 257 |
+
2019-05-17,24.4,11.9,18.8,0.2,0.2,0.0,2.0,17.7,35.6,51,39615.34,52275.49
|
| 258 |
+
2019-05-18,22.9,11.2,16.7,0.0,0.0,0.0,0.0,16.2,28.8,3,48894.36,52383.75
|
| 259 |
+
2019-05-19,22.8,12.3,18.2,2.4,2.4,0.0,4.0,25.6,49.0,55,47441.0,52489.26
|
| 260 |
+
2019-05-20,28.0,18.7,23.1,1.7,1.7,0.0,5.0,26.5,61.9,53,47632.42,52592.85
|
| 261 |
+
2019-05-21,19.8,12.4,16.0,0.0,0.0,0.0,0.0,26.3,50.4,3,49181.6,52694.46
|
| 262 |
+
2019-05-22,22.2,10.4,16.7,0.0,0.0,0.0,0.0,16.2,32.4,3,46659.08,52794.0
|
| 263 |
+
2019-05-23,24.9,14.1,18.8,0.8,0.8,0.0,3.0,23.0,43.9,53,37003.31,52891.37
|
| 264 |
+
2019-05-24,23.2,14.6,19.6,0.3,0.3,0.0,1.0,32.8,64.1,51,47938.18,52986.44
|
| 265 |
+
2019-05-25,19.5,12.2,15.8,0.0,0.0,0.0,0.0,26.2,50.4,3,49535.76,53079.13
|
| 266 |
+
2019-05-26,29.0,15.1,21.8,3.0,3.0,0.0,5.0,20.9,39.6,61,43200.0,53169.33
|
| 267 |
+
2019-05-27,25.5,15.6,21.0,0.0,0.0,0.0,0.0,15.9,32.4,3,49656.98,53256.91
|
| 268 |
+
2019-05-28,20.8,15.9,18.4,6.9,6.9,0.0,9.0,30.8,57.6,61,15997.57,53341.79
|
| 269 |
+
2019-05-29,21.0,13.8,17.9,2.3,2.3,0.0,6.0,20.1,37.4,53,19328.21,53423.84
|
| 270 |
+
2019-05-30,20.4,13.2,16.8,14.4,14.4,0.0,9.0,17.0,42.8,63,13643.93,53502.95
|
| 271 |
+
2019-05-31,25.0,17.1,21.1,0.4,0.4,0.0,4.0,14.6,29.2,51,49836.14,53579.02
|
| 272 |
+
2019-06-01,25.9,15.6,19.9,0.0,0.0,0.0,0.0,17.4,37.4,3,49878.14,53651.92
|
| 273 |
+
2019-06-02,25.9,16.2,20.8,1.8,1.8,0.0,5.0,20.1,46.1,55,42366.16,53721.55
|
| 274 |
+
2019-06-03,20.8,11.9,16.7,0.0,0.0,0.0,0.0,25.8,50.4,2,49958.14,53787.8
|
| 275 |
+
2019-06-04,21.4,10.9,16.3,0.0,0.0,0.0,0.0,18.7,37.1,3,49995.97,53850.55
|
| 276 |
+
2019-06-05,26.2,16.2,21.0,6.4,6.4,0.0,5.0,21.2,42.8,63,29000.61,53909.7
|
| 277 |
+
2019-06-06,27.2,18.9,23.1,8.8,8.8,0.0,4.0,19.3,35.6,63,50066.87,53965.12
|
| 278 |
+
2019-06-07,25.5,16.7,20.5,0.0,0.0,0.0,0.0,16.9,33.8,3,45892.7,54016.7
|
| 279 |
+
2019-06-08,25.9,15.3,20.3,0.0,0.0,0.0,0.0,21.4,42.1,3,50131.32,54064.52
|
| 280 |
+
2019-06-09,23.7,13.6,18.5,0.0,0.0,0.0,0.0,22.1,43.9,3,50161.96,54108.94
|
| 281 |
+
2019-06-10,20.5,13.6,17.6,15.8,15.8,0.0,18.0,23.9,43.2,61,0.0,54149.97
|
| 282 |
+
2019-06-11,23.5,15.8,20.1,8.4,8.4,0.0,9.0,27.6,54.7,61,36240.15,54187.59
|
| 283 |
+
2019-06-12,22.2,13.1,17.8,0.0,0.0,0.0,0.0,21.6,42.5,3,50247.64,54221.75
|
| 284 |
+
2019-06-13,19.3,14.9,16.8,20.3,20.3,0.0,11.0,29.9,57.2,63,24408.88,54252.45
|
| 285 |
+
2019-06-14,22.6,14.3,17.9,0.3,0.3,0.0,3.0,27.9,53.3,51,50298.79,54279.64
|
| 286 |
+
2019-06-15,26.7,13.1,20.5,0.0,0.0,0.0,0.0,24.1,46.8,3,46391.4,54303.31
|
| 287 |
+
2019-06-16,26.5,17.8,22.3,4.4,4.4,0.0,13.0,19.7,45.0,53,27321.03,54323.45
|
| 288 |
+
2019-06-17,23.5,19.3,21.2,6.0,6.0,0.0,12.0,11.9,30.2,61,11254.65,54340.01
|
| 289 |
+
2019-06-18,24.7,19.2,21.0,15.0,15.0,0.0,19.0,15.2,34.9,63,2926.9,54352.99
|
| 290 |
+
2019-06-19,21.1,18.0,19.8,12.9,12.9,0.0,14.0,15.6,34.6,61,5531.7,54362.37
|
| 291 |
+
2019-06-20,26.2,19.0,22.1,10.6,10.6,0.0,15.0,15.8,39.2,63,21182.92,54368.12
|
| 292 |
+
2019-06-21,24.1,18.2,20.5,17.1,17.1,0.0,13.0,25.2,47.2,63,25238.35,54370.26
|
| 293 |
+
2019-06-22,24.2,15.8,20.4,0.0,0.0,0.0,0.0,23.1,43.9,2,50400.0,54368.75
|
| 294 |
+
2019-06-23,26.8,15.4,21.7,0.0,0.0,0.0,0.0,17.9,32.0,3,50400.0,54363.59
|
| 295 |
+
2019-06-24,28.0,16.1,22.4,0.0,0.0,0.0,0.0,11.8,22.0,3,36018.93,54354.77
|
| 296 |
+
2019-06-25,28.0,20.4,23.3,10.6,10.6,0.0,11.0,11.4,28.1,63,19588.62,54342.28
|
| 297 |
+
2019-06-26,29.8,20.4,25.0,0.0,0.0,0.0,0.0,17.3,31.0,3,50400.0,54326.11
|
lr_attendance/prepare_for_modeling.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
| 4 |
+
from sklearn.model_selection import train_test_split
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
import seaborn as sns
|
| 7 |
+
|
| 8 |
+
# Load the complete feature dataset
|
| 9 |
+
df = pd.read_csv("attendance_features_complete.csv")
|
| 10 |
+
|
| 11 |
+
print("=== FEATURE ENGINEERING ANALYSIS ===")
|
| 12 |
+
print(f"Dataset shape: {df.shape}")
|
| 13 |
+
print(f"Target variable: attendance_rate")
|
| 14 |
+
|
| 15 |
+
# Target variable statistics
|
| 16 |
+
print("\n=== TARGET VARIABLE ANALYSIS ===")
|
| 17 |
+
print("Attendance Rate Statistics:")
|
| 18 |
+
print(df["attendance_rate"].describe())
|
| 19 |
+
|
| 20 |
+
# Create target categories for analysis
|
| 21 |
+
df["attendance_category"] = pd.cut(
|
| 22 |
+
df["attendance_rate"],
|
| 23 |
+
bins=[0, 85, 92, 95, 100],
|
| 24 |
+
labels=["Poor", "Average", "Good", "Excellent"],
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
print("\nAttendance Categories:")
|
| 28 |
+
print(df["attendance_category"].value_counts())
|
| 29 |
+
|
| 30 |
+
# Feature list by category
|
| 31 |
+
temporal_features = [
|
| 32 |
+
"day_of_week",
|
| 33 |
+
"month",
|
| 34 |
+
"quarter",
|
| 35 |
+
"week_of_year",
|
| 36 |
+
"day_of_month",
|
| 37 |
+
"day_of_year",
|
| 38 |
+
"is_weekend",
|
| 39 |
+
"is_school_day",
|
| 40 |
+
"is_month_start",
|
| 41 |
+
"is_month_end",
|
| 42 |
+
"is_friday",
|
| 43 |
+
"is_monday",
|
| 44 |
+
"school_year_progress",
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
holiday_features = ["is_holiday", "days_to_next_holiday", "days_since_last_holiday"]
|
| 48 |
+
|
| 49 |
+
weather_features = [
|
| 50 |
+
"temp_max",
|
| 51 |
+
"temp_min",
|
| 52 |
+
"temp_mean",
|
| 53 |
+
"temp_range",
|
| 54 |
+
"precipitation_total",
|
| 55 |
+
"rain_total",
|
| 56 |
+
"snow_total",
|
| 57 |
+
"precipitation_hours",
|
| 58 |
+
"wind_speed_max",
|
| 59 |
+
"wind_gust_max",
|
| 60 |
+
"sunshine_duration",
|
| 61 |
+
"daylight_duration",
|
| 62 |
+
"is_rainy_day",
|
| 63 |
+
"is_snowy_day",
|
| 64 |
+
"is_windy_day",
|
| 65 |
+
"is_extreme_temp",
|
| 66 |
+
"weather_severity",
|
| 67 |
+
]
|
| 68 |
+
|
| 69 |
+
# School-level features
|
| 70 |
+
school_features = ["School DBN"]
|
| 71 |
+
|
| 72 |
+
# Target variable
|
| 73 |
+
target = "attendance_rate"
|
| 74 |
+
|
| 75 |
+
print(f"\n=== FEATURE CATEGORIES ===")
|
| 76 |
+
print(f"Temporal features: {len(temporal_features)}")
|
| 77 |
+
print(f"Holiday features: {len(holiday_features)}")
|
| 78 |
+
print(f"Weather features: {len(weather_features)}")
|
| 79 |
+
print(f"School features: {len(school_features)}")
|
| 80 |
+
|
| 81 |
+
# Check for missing values
|
| 82 |
+
print("\n=== MISSING VALUES ANALYSIS ===")
|
| 83 |
+
all_features = temporal_features + holiday_features + weather_features
|
| 84 |
+
missing_analysis = df[all_features + [target]].isnull().sum()
|
| 85 |
+
print(missing_analysis[missing_analysis > 0])
|
| 86 |
+
|
| 87 |
+
# Correlation analysis
|
| 88 |
+
print("\n=== CORRELATION ANALYSIS ===")
|
| 89 |
+
numeric_features = df[
|
| 90 |
+
temporal_features + holiday_features + weather_features + [target]
|
| 91 |
+
].select_dtypes(include=[np.number])
|
| 92 |
+
correlation_matrix = numeric_features.corr()
|
| 93 |
+
|
| 94 |
+
# Top correlations with target
|
| 95 |
+
target_correlations = correlation_matrix[target].abs().sort_values(ascending=False)
|
| 96 |
+
print("Top 15 features correlated with attendance rate:")
|
| 97 |
+
print(target_correlations.head(16)[1:]) # Exclude self-correlation
|
| 98 |
+
|
| 99 |
+
# Feature importance for linear regression (high correlation features)
|
| 100 |
+
high_corr_features = target_correlations[target_correlations > 0.1].index.tolist()
|
| 101 |
+
print(f"\nFeatures with correlation > 0.1: {len(high_corr_features)}")
|
| 102 |
+
print(high_corr_features)
|
| 103 |
+
|
| 104 |
+
# Prepare data for modeling
|
| 105 |
+
print("\n=== DATA PREPARATION FOR MODELING ===")
|
| 106 |
+
|
| 107 |
+
# Handle missing values in weather features
|
| 108 |
+
df_clean = df.copy()
|
| 109 |
+
for feature in weather_features:
|
| 110 |
+
if df_clean[feature].isnull().sum() > 0:
|
| 111 |
+
# Fill with median for numeric features
|
| 112 |
+
df_clean[feature] = df_clean[feature].fillna(df_clean[feature].median())
|
| 113 |
+
|
| 114 |
+
# Create interaction features
|
| 115 |
+
df_clean["temp_humidity_interaction"] = (
|
| 116 |
+
df_clean["temp_mean"] * df_clean["precipitation_total"]
|
| 117 |
+
)
|
| 118 |
+
df_clean["wind_precip_interaction"] = (
|
| 119 |
+
df_clean["wind_speed_max"] * df_clean["precipitation_total"]
|
| 120 |
+
)
|
| 121 |
+
df_clean["holiday_weather_interaction"] = (
|
| 122 |
+
df_clean["is_holiday"] * df_clean["weather_severity"]
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# Polynomial features for important continuous variables
|
| 126 |
+
df_clean["temp_squared"] = df_clean["temp_mean"] ** 2
|
| 127 |
+
df_clean["precipitation_squared"] = df_clean["precipitation_total"] ** 2
|
| 128 |
+
|
| 129 |
+
# Encoding categorical features
|
| 130 |
+
le = LabelEncoder()
|
| 131 |
+
df_clean["season_encoded"] = le.fit_transform(df_clean["season"])
|
| 132 |
+
|
| 133 |
+
# Final feature list
|
| 134 |
+
final_features = (
|
| 135 |
+
temporal_features
|
| 136 |
+
+ holiday_features
|
| 137 |
+
+ weather_features
|
| 138 |
+
+ [
|
| 139 |
+
"temp_humidity_interaction",
|
| 140 |
+
"wind_precip_interaction",
|
| 141 |
+
"holiday_weather_interaction",
|
| 142 |
+
"temp_squared",
|
| 143 |
+
"precipitation_squared",
|
| 144 |
+
"season_encoded",
|
| 145 |
+
]
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# Remove any remaining non-numeric or problematic features
|
| 149 |
+
final_features = [
|
| 150 |
+
f for f in final_features if f in df_clean.columns and df_clean[f].dtype != "object"
|
| 151 |
+
]
|
| 152 |
+
|
| 153 |
+
print(f"Final feature count for modeling: {len(final_features)}")
|
| 154 |
+
|
| 155 |
+
# Split data
|
| 156 |
+
X = df_clean[final_features]
|
| 157 |
+
y = df_clean[target]
|
| 158 |
+
|
| 159 |
+
# Remove rows with missing target
|
| 160 |
+
mask = ~y.isnull()
|
| 161 |
+
X = X[mask]
|
| 162 |
+
y = y[mask]
|
| 163 |
+
|
| 164 |
+
print(f"Final dataset shape for modeling: {X.shape}")
|
| 165 |
+
|
| 166 |
+
# Train-test split
|
| 167 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 168 |
+
X, y, test_size=0.2, random_state=42
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
print(f"Training set: {X_train.shape}")
|
| 172 |
+
print(f"Test set: {X_test.shape}")
|
| 173 |
+
|
| 174 |
+
# Feature scaling
|
| 175 |
+
scaler = StandardScaler()
|
| 176 |
+
X_train_scaled = scaler.fit_transform(X_train)
|
| 177 |
+
X_test_scaled = scaler.transform(X_test)
|
| 178 |
+
|
| 179 |
+
# Save prepared datasets
|
| 180 |
+
train_data = pd.DataFrame(X_train_scaled, columns=final_features)
|
| 181 |
+
train_data["attendance_rate"] = y_train.values
|
| 182 |
+
|
| 183 |
+
test_data = pd.DataFrame(X_test_scaled, columns=final_features)
|
| 184 |
+
test_data["attendance_rate"] = y_test.values
|
| 185 |
+
|
| 186 |
+
train_data.to_csv("train_data_scaled.csv", index=False)
|
| 187 |
+
test_data.to_csv("test_data_scaled.csv", index=False)
|
| 188 |
+
|
| 189 |
+
# Save feature information
|
| 190 |
+
feature_info = {
|
| 191 |
+
"final_features": final_features,
|
| 192 |
+
"temporal_features": temporal_features,
|
| 193 |
+
"holiday_features": holiday_features,
|
| 194 |
+
"weather_features": weather_features,
|
| 195 |
+
"target_correlations": target_correlations.to_dict(),
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
import json
|
| 199 |
+
|
| 200 |
+
with open("feature_info.json", "w") as f:
|
| 201 |
+
json.dump(feature_info, f, indent=2)
|
| 202 |
+
|
| 203 |
+
print("\n=== DATASETS SAVED ===")
|
| 204 |
+
print("v train_data_scaled.csv - Training data with scaled features")
|
| 205 |
+
print("v test_data_scaled.csv - Test data with scaled features")
|
| 206 |
+
print("v feature_info.json - Feature metadata and correlations")
|
| 207 |
+
|
| 208 |
+
print(f"\n=== FEATURE ENGINEERING SUMMARY ===")
|
| 209 |
+
print(f"v Enhanced date column with {len(temporal_features)} temporal features")
|
| 210 |
+
print(f"v Added {len(holiday_features)} holiday-related features")
|
| 211 |
+
print(f"v Integrated {len(weather_features)} weather features")
|
| 212 |
+
print(f"v Created interaction and polynomial features")
|
| 213 |
+
print(f"v Final dataset ready for multiple linear regression")
|
| 214 |
+
print(f"v Average attendance rate: {df['attendance_rate'].mean():.2f}%")
|
| 215 |
+
print(f"v Features most correlated with attendance: {high_corr_features[:5]}")
|
lr_attendance/train.ipynb
ADDED
|
@@ -0,0 +1,1140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 15,
|
| 6 |
+
"id": "ab55b6f5-72a8-42bb-ae90-53ec5bb79501",
|
| 7 |
+
"metadata": {
|
| 8 |
+
"scrolled": true
|
| 9 |
+
},
|
| 10 |
+
"outputs": [
|
| 11 |
+
{
|
| 12 |
+
"name": "stdout",
|
| 13 |
+
"output_type": "stream",
|
| 14 |
+
"text": [
|
| 15 |
+
"School DBN 0\n",
|
| 16 |
+
"Date 0\n",
|
| 17 |
+
"Enrolled 0\n",
|
| 18 |
+
"Absent 0\n",
|
| 19 |
+
"Present 0\n",
|
| 20 |
+
"Released 0\n",
|
| 21 |
+
"dtype: int64\n"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"data": {
|
| 26 |
+
"text/html": [
|
| 27 |
+
"<div>\n",
|
| 28 |
+
"<style scoped>\n",
|
| 29 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 30 |
+
" vertical-align: middle;\n",
|
| 31 |
+
" }\n",
|
| 32 |
+
"\n",
|
| 33 |
+
" .dataframe tbody tr th {\n",
|
| 34 |
+
" vertical-align: top;\n",
|
| 35 |
+
" }\n",
|
| 36 |
+
"\n",
|
| 37 |
+
" .dataframe thead th {\n",
|
| 38 |
+
" text-align: right;\n",
|
| 39 |
+
" }\n",
|
| 40 |
+
"</style>\n",
|
| 41 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 42 |
+
" <thead>\n",
|
| 43 |
+
" <tr style=\"text-align: right;\">\n",
|
| 44 |
+
" <th></th>\n",
|
| 45 |
+
" <th>Date</th>\n",
|
| 46 |
+
" <th>Enrolled</th>\n",
|
| 47 |
+
" <th>Absent</th>\n",
|
| 48 |
+
" <th>Present</th>\n",
|
| 49 |
+
" <th>Released</th>\n",
|
| 50 |
+
" </tr>\n",
|
| 51 |
+
" </thead>\n",
|
| 52 |
+
" <tbody>\n",
|
| 53 |
+
" <tr>\n",
|
| 54 |
+
" <th>count</th>\n",
|
| 55 |
+
" <td>2.771530e+05</td>\n",
|
| 56 |
+
" <td>277153.00000</td>\n",
|
| 57 |
+
" <td>277153.000000</td>\n",
|
| 58 |
+
" <td>277153.000000</td>\n",
|
| 59 |
+
" <td>277153.000000</td>\n",
|
| 60 |
+
" </tr>\n",
|
| 61 |
+
" <tr>\n",
|
| 62 |
+
" <th>mean</th>\n",
|
| 63 |
+
" <td>2.018665e+07</td>\n",
|
| 64 |
+
" <td>596.98617</td>\n",
|
| 65 |
+
" <td>50.503538</td>\n",
|
| 66 |
+
" <td>544.499403</td>\n",
|
| 67 |
+
" <td>1.983229</td>\n",
|
| 68 |
+
" </tr>\n",
|
| 69 |
+
" <tr>\n",
|
| 70 |
+
" <th>std</th>\n",
|
| 71 |
+
" <td>4.555413e+03</td>\n",
|
| 72 |
+
" <td>482.90966</td>\n",
|
| 73 |
+
" <td>54.329671</td>\n",
|
| 74 |
+
" <td>452.970313</td>\n",
|
| 75 |
+
" <td>35.114511</td>\n",
|
| 76 |
+
" </tr>\n",
|
| 77 |
+
" <tr>\n",
|
| 78 |
+
" <th>min</th>\n",
|
| 79 |
+
" <td>2.018090e+07</td>\n",
|
| 80 |
+
" <td>1.00000</td>\n",
|
| 81 |
+
" <td>0.000000</td>\n",
|
| 82 |
+
" <td>1.000000</td>\n",
|
| 83 |
+
" <td>0.000000</td>\n",
|
| 84 |
+
" </tr>\n",
|
| 85 |
+
" <tr>\n",
|
| 86 |
+
" <th>25%</th>\n",
|
| 87 |
+
" <td>2.018111e+07</td>\n",
|
| 88 |
+
" <td>329.00000</td>\n",
|
| 89 |
+
" <td>23.000000</td>\n",
|
| 90 |
+
" <td>291.000000</td>\n",
|
| 91 |
+
" <td>0.000000</td>\n",
|
| 92 |
+
" </tr>\n",
|
| 93 |
+
" <tr>\n",
|
| 94 |
+
" <th>50%</th>\n",
|
| 95 |
+
" <td>2.019013e+07</td>\n",
|
| 96 |
+
" <td>476.00000</td>\n",
|
| 97 |
+
" <td>38.000000</td>\n",
|
| 98 |
+
" <td>430.000000</td>\n",
|
| 99 |
+
" <td>0.000000</td>\n",
|
| 100 |
+
" </tr>\n",
|
| 101 |
+
" <tr>\n",
|
| 102 |
+
" <th>75%</th>\n",
|
| 103 |
+
" <td>2.019041e+07</td>\n",
|
| 104 |
+
" <td>684.00000</td>\n",
|
| 105 |
+
" <td>59.000000</td>\n",
|
| 106 |
+
" <td>640.000000</td>\n",
|
| 107 |
+
" <td>0.000000</td>\n",
|
| 108 |
+
" </tr>\n",
|
| 109 |
+
" <tr>\n",
|
| 110 |
+
" <th>max</th>\n",
|
| 111 |
+
" <td>2.019063e+07</td>\n",
|
| 112 |
+
" <td>5955.00000</td>\n",
|
| 113 |
+
" <td>2151.000000</td>\n",
|
| 114 |
+
" <td>5847.000000</td>\n",
|
| 115 |
+
" <td>5904.000000</td>\n",
|
| 116 |
+
" </tr>\n",
|
| 117 |
+
" </tbody>\n",
|
| 118 |
+
"</table>\n",
|
| 119 |
+
"</div>"
|
| 120 |
+
],
|
| 121 |
+
"text/plain": [
|
| 122 |
+
" Date Enrolled Absent Present Released\n",
|
| 123 |
+
"count 2.771530e+05 277153.00000 277153.000000 277153.000000 277153.000000\n",
|
| 124 |
+
"mean 2.018665e+07 596.98617 50.503538 544.499403 1.983229\n",
|
| 125 |
+
"std 4.555413e+03 482.90966 54.329671 452.970313 35.114511\n",
|
| 126 |
+
"min 2.018090e+07 1.00000 0.000000 1.000000 0.000000\n",
|
| 127 |
+
"25% 2.018111e+07 329.00000 23.000000 291.000000 0.000000\n",
|
| 128 |
+
"50% 2.019013e+07 476.00000 38.000000 430.000000 0.000000\n",
|
| 129 |
+
"75% 2.019041e+07 684.00000 59.000000 640.000000 0.000000\n",
|
| 130 |
+
"max 2.019063e+07 5955.00000 2151.000000 5847.000000 5904.000000"
|
| 131 |
+
]
|
| 132 |
+
},
|
| 133 |
+
"execution_count": 15,
|
| 134 |
+
"metadata": {},
|
| 135 |
+
"output_type": "execute_result"
|
| 136 |
+
}
|
| 137 |
+
],
|
| 138 |
+
"source": [
|
| 139 |
+
"# import lib\n",
|
| 140 |
+
"import pandas as pd\n",
|
| 141 |
+
"from sklearn.model_selection import train_test_split\n",
|
| 142 |
+
"from sklearn.linear_model import LinearRegression\n",
|
| 143 |
+
"from sklearn.metrics import mean_absolute_error ,r2_score\n",
|
| 144 |
+
"from sklearn.linear_model import Ridge\n",
|
| 145 |
+
"import matplotlib.pyplot as plt\n",
|
| 146 |
+
"#data clean\n",
|
| 147 |
+
"\n",
|
| 148 |
+
"df = pd.read_csv(\"DailyPresence.csv\")\n",
|
| 149 |
+
"print(df.isnull().sum())\n",
|
| 150 |
+
"df.describe()\n"
|
| 151 |
+
]
|
| 152 |
+
},
|
| 153 |
+
{
|
| 154 |
+
"cell_type": "code",
|
| 155 |
+
"execution_count": 16,
|
| 156 |
+
"id": "b403a040-92fc-440e-a0fc-4d7d5395c2f2",
|
| 157 |
+
"metadata": {},
|
| 158 |
+
"outputs": [],
|
| 159 |
+
"source": [
|
| 160 |
+
"# creating the variables \n",
|
| 161 |
+
"x = df[['Enrolled']]\n",
|
| 162 |
+
"y = df['Present']\n",
|
| 163 |
+
"x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)\n"
|
| 164 |
+
]
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"cell_type": "code",
|
| 168 |
+
"execution_count": 17,
|
| 169 |
+
"id": "240452cf-351a-48e1-8f65-9052c8700094",
|
| 170 |
+
"metadata": {},
|
| 171 |
+
"outputs": [
|
| 172 |
+
{
|
| 173 |
+
"data": {
|
| 174 |
+
"text/html": [
|
| 175 |
+
"<style>#sk-container-id-4 {\n",
|
| 176 |
+
" /* Definition of color scheme common for light and dark mode */\n",
|
| 177 |
+
" --sklearn-color-text: #000;\n",
|
| 178 |
+
" --sklearn-color-text-muted: #666;\n",
|
| 179 |
+
" --sklearn-color-line: gray;\n",
|
| 180 |
+
" /* Definition of color scheme for unfitted estimators */\n",
|
| 181 |
+
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
|
| 182 |
+
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
|
| 183 |
+
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
|
| 184 |
+
" --sklearn-color-unfitted-level-3: chocolate;\n",
|
| 185 |
+
" /* Definition of color scheme for fitted estimators */\n",
|
| 186 |
+
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
|
| 187 |
+
" --sklearn-color-fitted-level-1: #d4ebff;\n",
|
| 188 |
+
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
|
| 189 |
+
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
|
| 190 |
+
"}\n",
|
| 191 |
+
"\n",
|
| 192 |
+
"#sk-container-id-4.light {\n",
|
| 193 |
+
" /* Specific color for light theme */\n",
|
| 194 |
+
" --sklearn-color-text-on-default-background: black;\n",
|
| 195 |
+
" --sklearn-color-background: white;\n",
|
| 196 |
+
" --sklearn-color-border-box: black;\n",
|
| 197 |
+
" --sklearn-color-icon: #696969;\n",
|
| 198 |
+
"}\n",
|
| 199 |
+
"\n",
|
| 200 |
+
"#sk-container-id-4.dark {\n",
|
| 201 |
+
" --sklearn-color-text-on-default-background: white;\n",
|
| 202 |
+
" --sklearn-color-background: #111;\n",
|
| 203 |
+
" --sklearn-color-border-box: white;\n",
|
| 204 |
+
" --sklearn-color-icon: #878787;\n",
|
| 205 |
+
"}\n",
|
| 206 |
+
"\n",
|
| 207 |
+
"#sk-container-id-4 {\n",
|
| 208 |
+
" color: var(--sklearn-color-text);\n",
|
| 209 |
+
"}\n",
|
| 210 |
+
"\n",
|
| 211 |
+
"#sk-container-id-4 pre {\n",
|
| 212 |
+
" padding: 0;\n",
|
| 213 |
+
"}\n",
|
| 214 |
+
"\n",
|
| 215 |
+
"#sk-container-id-4 input.sk-hidden--visually {\n",
|
| 216 |
+
" border: 0;\n",
|
| 217 |
+
" clip: rect(1px 1px 1px 1px);\n",
|
| 218 |
+
" clip: rect(1px, 1px, 1px, 1px);\n",
|
| 219 |
+
" height: 1px;\n",
|
| 220 |
+
" margin: -1px;\n",
|
| 221 |
+
" overflow: hidden;\n",
|
| 222 |
+
" padding: 0;\n",
|
| 223 |
+
" position: absolute;\n",
|
| 224 |
+
" width: 1px;\n",
|
| 225 |
+
"}\n",
|
| 226 |
+
"\n",
|
| 227 |
+
"#sk-container-id-4 div.sk-dashed-wrapped {\n",
|
| 228 |
+
" border: 1px dashed var(--sklearn-color-line);\n",
|
| 229 |
+
" margin: 0 0.4em 0.5em 0.4em;\n",
|
| 230 |
+
" box-sizing: border-box;\n",
|
| 231 |
+
" padding-bottom: 0.4em;\n",
|
| 232 |
+
" background-color: var(--sklearn-color-background);\n",
|
| 233 |
+
"}\n",
|
| 234 |
+
"\n",
|
| 235 |
+
"#sk-container-id-4 div.sk-container {\n",
|
| 236 |
+
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
|
| 237 |
+
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
|
| 238 |
+
" so we also need the `!important` here to be able to override the\n",
|
| 239 |
+
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
|
| 240 |
+
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
|
| 241 |
+
" display: inline-block !important;\n",
|
| 242 |
+
" position: relative;\n",
|
| 243 |
+
"}\n",
|
| 244 |
+
"\n",
|
| 245 |
+
"#sk-container-id-4 div.sk-text-repr-fallback {\n",
|
| 246 |
+
" display: none;\n",
|
| 247 |
+
"}\n",
|
| 248 |
+
"\n",
|
| 249 |
+
"div.sk-parallel-item,\n",
|
| 250 |
+
"div.sk-serial,\n",
|
| 251 |
+
"div.sk-item {\n",
|
| 252 |
+
" /* draw centered vertical line to link estimators */\n",
|
| 253 |
+
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
|
| 254 |
+
" background-size: 2px 100%;\n",
|
| 255 |
+
" background-repeat: no-repeat;\n",
|
| 256 |
+
" background-position: center center;\n",
|
| 257 |
+
"}\n",
|
| 258 |
+
"\n",
|
| 259 |
+
"/* Parallel-specific style estimator block */\n",
|
| 260 |
+
"\n",
|
| 261 |
+
"#sk-container-id-4 div.sk-parallel-item::after {\n",
|
| 262 |
+
" content: \"\";\n",
|
| 263 |
+
" width: 100%;\n",
|
| 264 |
+
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
|
| 265 |
+
" flex-grow: 1;\n",
|
| 266 |
+
"}\n",
|
| 267 |
+
"\n",
|
| 268 |
+
"#sk-container-id-4 div.sk-parallel {\n",
|
| 269 |
+
" display: flex;\n",
|
| 270 |
+
" align-items: stretch;\n",
|
| 271 |
+
" justify-content: center;\n",
|
| 272 |
+
" background-color: var(--sklearn-color-background);\n",
|
| 273 |
+
" position: relative;\n",
|
| 274 |
+
"}\n",
|
| 275 |
+
"\n",
|
| 276 |
+
"#sk-container-id-4 div.sk-parallel-item {\n",
|
| 277 |
+
" display: flex;\n",
|
| 278 |
+
" flex-direction: column;\n",
|
| 279 |
+
"}\n",
|
| 280 |
+
"\n",
|
| 281 |
+
"#sk-container-id-4 div.sk-parallel-item:first-child::after {\n",
|
| 282 |
+
" align-self: flex-end;\n",
|
| 283 |
+
" width: 50%;\n",
|
| 284 |
+
"}\n",
|
| 285 |
+
"\n",
|
| 286 |
+
"#sk-container-id-4 div.sk-parallel-item:last-child::after {\n",
|
| 287 |
+
" align-self: flex-start;\n",
|
| 288 |
+
" width: 50%;\n",
|
| 289 |
+
"}\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"#sk-container-id-4 div.sk-parallel-item:only-child::after {\n",
|
| 292 |
+
" width: 0;\n",
|
| 293 |
+
"}\n",
|
| 294 |
+
"\n",
|
| 295 |
+
"/* Serial-specific style estimator block */\n",
|
| 296 |
+
"\n",
|
| 297 |
+
"#sk-container-id-4 div.sk-serial {\n",
|
| 298 |
+
" display: flex;\n",
|
| 299 |
+
" flex-direction: column;\n",
|
| 300 |
+
" align-items: center;\n",
|
| 301 |
+
" background-color: var(--sklearn-color-background);\n",
|
| 302 |
+
" padding-right: 1em;\n",
|
| 303 |
+
" padding-left: 1em;\n",
|
| 304 |
+
"}\n",
|
| 305 |
+
"\n",
|
| 306 |
+
"\n",
|
| 307 |
+
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
|
| 308 |
+
"clickable and can be expanded/collapsed.\n",
|
| 309 |
+
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
|
| 310 |
+
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
|
| 311 |
+
"*/\n",
|
| 312 |
+
"\n",
|
| 313 |
+
"/* Pipeline and ColumnTransformer style (default) */\n",
|
| 314 |
+
"\n",
|
| 315 |
+
"#sk-container-id-4 div.sk-toggleable {\n",
|
| 316 |
+
" /* Default theme specific background. It is overwritten whether we have a\n",
|
| 317 |
+
" specific estimator or a Pipeline/ColumnTransformer */\n",
|
| 318 |
+
" background-color: var(--sklearn-color-background);\n",
|
| 319 |
+
"}\n",
|
| 320 |
+
"\n",
|
| 321 |
+
"/* Toggleable label */\n",
|
| 322 |
+
"#sk-container-id-4 label.sk-toggleable__label {\n",
|
| 323 |
+
" cursor: pointer;\n",
|
| 324 |
+
" display: flex;\n",
|
| 325 |
+
" width: 100%;\n",
|
| 326 |
+
" margin-bottom: 0;\n",
|
| 327 |
+
" padding: 0.5em;\n",
|
| 328 |
+
" box-sizing: border-box;\n",
|
| 329 |
+
" text-align: center;\n",
|
| 330 |
+
" align-items: center;\n",
|
| 331 |
+
" justify-content: center;\n",
|
| 332 |
+
" gap: 0.5em;\n",
|
| 333 |
+
"}\n",
|
| 334 |
+
"\n",
|
| 335 |
+
"#sk-container-id-4 label.sk-toggleable__label .caption {\n",
|
| 336 |
+
" font-size: 0.6rem;\n",
|
| 337 |
+
" font-weight: lighter;\n",
|
| 338 |
+
" color: var(--sklearn-color-text-muted);\n",
|
| 339 |
+
"}\n",
|
| 340 |
+
"\n",
|
| 341 |
+
"#sk-container-id-4 label.sk-toggleable__label-arrow:before {\n",
|
| 342 |
+
" /* Arrow on the left of the label */\n",
|
| 343 |
+
" content: \"▸\";\n",
|
| 344 |
+
" float: left;\n",
|
| 345 |
+
" margin-right: 0.25em;\n",
|
| 346 |
+
" color: var(--sklearn-color-icon);\n",
|
| 347 |
+
"}\n",
|
| 348 |
+
"\n",
|
| 349 |
+
"#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {\n",
|
| 350 |
+
" color: var(--sklearn-color-text);\n",
|
| 351 |
+
"}\n",
|
| 352 |
+
"\n",
|
| 353 |
+
"/* Toggleable content - dropdown */\n",
|
| 354 |
+
"\n",
|
| 355 |
+
"#sk-container-id-4 div.sk-toggleable__content {\n",
|
| 356 |
+
" display: none;\n",
|
| 357 |
+
" text-align: left;\n",
|
| 358 |
+
" /* unfitted */\n",
|
| 359 |
+
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
| 360 |
+
"}\n",
|
| 361 |
+
"\n",
|
| 362 |
+
"#sk-container-id-4 div.sk-toggleable__content.fitted {\n",
|
| 363 |
+
" /* fitted */\n",
|
| 364 |
+
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
| 365 |
+
"}\n",
|
| 366 |
+
"\n",
|
| 367 |
+
"#sk-container-id-4 div.sk-toggleable__content pre {\n",
|
| 368 |
+
" margin: 0.2em;\n",
|
| 369 |
+
" border-radius: 0.25em;\n",
|
| 370 |
+
" color: var(--sklearn-color-text);\n",
|
| 371 |
+
" /* unfitted */\n",
|
| 372 |
+
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
| 373 |
+
"}\n",
|
| 374 |
+
"\n",
|
| 375 |
+
"#sk-container-id-4 div.sk-toggleable__content.fitted pre {\n",
|
| 376 |
+
" /* unfitted */\n",
|
| 377 |
+
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
| 378 |
+
"}\n",
|
| 379 |
+
"\n",
|
| 380 |
+
"#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
|
| 381 |
+
" /* Expand drop-down */\n",
|
| 382 |
+
" display: block;\n",
|
| 383 |
+
" width: 100%;\n",
|
| 384 |
+
" overflow: visible;\n",
|
| 385 |
+
"}\n",
|
| 386 |
+
"\n",
|
| 387 |
+
"#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
|
| 388 |
+
" content: \"▾\";\n",
|
| 389 |
+
"}\n",
|
| 390 |
+
"\n",
|
| 391 |
+
"/* Pipeline/ColumnTransformer-specific style */\n",
|
| 392 |
+
"\n",
|
| 393 |
+
"#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
| 394 |
+
" color: var(--sklearn-color-text);\n",
|
| 395 |
+
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
| 396 |
+
"}\n",
|
| 397 |
+
"\n",
|
| 398 |
+
"#sk-container-id-4 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
| 399 |
+
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
| 400 |
+
"}\n",
|
| 401 |
+
"\n",
|
| 402 |
+
"/* Estimator-specific style */\n",
|
| 403 |
+
"\n",
|
| 404 |
+
"/* Colorize estimator box */\n",
|
| 405 |
+
"#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
| 406 |
+
" /* unfitted */\n",
|
| 407 |
+
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
| 408 |
+
"}\n",
|
| 409 |
+
"\n",
|
| 410 |
+
"#sk-container-id-4 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
|
| 411 |
+
" /* fitted */\n",
|
| 412 |
+
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
| 413 |
+
"}\n",
|
| 414 |
+
"\n",
|
| 415 |
+
"#sk-container-id-4 div.sk-label label.sk-toggleable__label,\n",
|
| 416 |
+
"#sk-container-id-4 div.sk-label label {\n",
|
| 417 |
+
" /* The background is the default theme color */\n",
|
| 418 |
+
" color: var(--sklearn-color-text-on-default-background);\n",
|
| 419 |
+
"}\n",
|
| 420 |
+
"\n",
|
| 421 |
+
"/* On hover, darken the color of the background */\n",
|
| 422 |
+
"#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {\n",
|
| 423 |
+
" color: var(--sklearn-color-text);\n",
|
| 424 |
+
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
| 425 |
+
"}\n",
|
| 426 |
+
"\n",
|
| 427 |
+
"/* Label box, darken color on hover, fitted */\n",
|
| 428 |
+
"#sk-container-id-4 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
|
| 429 |
+
" color: var(--sklearn-color-text);\n",
|
| 430 |
+
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
| 431 |
+
"}\n",
|
| 432 |
+
"\n",
|
| 433 |
+
"/* Estimator label */\n",
|
| 434 |
+
"\n",
|
| 435 |
+
"#sk-container-id-4 div.sk-label label {\n",
|
| 436 |
+
" font-family: monospace;\n",
|
| 437 |
+
" font-weight: bold;\n",
|
| 438 |
+
" line-height: 1.2em;\n",
|
| 439 |
+
"}\n",
|
| 440 |
+
"\n",
|
| 441 |
+
"#sk-container-id-4 div.sk-label-container {\n",
|
| 442 |
+
" text-align: center;\n",
|
| 443 |
+
"}\n",
|
| 444 |
+
"\n",
|
| 445 |
+
"/* Estimator-specific */\n",
|
| 446 |
+
"#sk-container-id-4 div.sk-estimator {\n",
|
| 447 |
+
" font-family: monospace;\n",
|
| 448 |
+
" border: 1px dotted var(--sklearn-color-border-box);\n",
|
| 449 |
+
" border-radius: 0.25em;\n",
|
| 450 |
+
" box-sizing: border-box;\n",
|
| 451 |
+
" margin-bottom: 0.5em;\n",
|
| 452 |
+
" /* unfitted */\n",
|
| 453 |
+
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
| 454 |
+
"}\n",
|
| 455 |
+
"\n",
|
| 456 |
+
"#sk-container-id-4 div.sk-estimator.fitted {\n",
|
| 457 |
+
" /* fitted */\n",
|
| 458 |
+
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
| 459 |
+
"}\n",
|
| 460 |
+
"\n",
|
| 461 |
+
"/* on hover */\n",
|
| 462 |
+
"#sk-container-id-4 div.sk-estimator:hover {\n",
|
| 463 |
+
" /* unfitted */\n",
|
| 464 |
+
" background-color: var(--sklearn-color-unfitted-level-2);\n",
|
| 465 |
+
"}\n",
|
| 466 |
+
"\n",
|
| 467 |
+
"#sk-container-id-4 div.sk-estimator.fitted:hover {\n",
|
| 468 |
+
" /* fitted */\n",
|
| 469 |
+
" background-color: var(--sklearn-color-fitted-level-2);\n",
|
| 470 |
+
"}\n",
|
| 471 |
+
"\n",
|
| 472 |
+
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
|
| 473 |
+
"\n",
|
| 474 |
+
"/* Common style for \"i\" and \"?\" */\n",
|
| 475 |
+
"\n",
|
| 476 |
+
".sk-estimator-doc-link,\n",
|
| 477 |
+
"a:link.sk-estimator-doc-link,\n",
|
| 478 |
+
"a:visited.sk-estimator-doc-link {\n",
|
| 479 |
+
" float: right;\n",
|
| 480 |
+
" font-size: smaller;\n",
|
| 481 |
+
" line-height: 1em;\n",
|
| 482 |
+
" font-family: monospace;\n",
|
| 483 |
+
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
| 484 |
+
" border-radius: 1em;\n",
|
| 485 |
+
" height: 1em;\n",
|
| 486 |
+
" width: 1em;\n",
|
| 487 |
+
" text-decoration: none !important;\n",
|
| 488 |
+
" margin-left: 0.5em;\n",
|
| 489 |
+
" text-align: center;\n",
|
| 490 |
+
" /* unfitted */\n",
|
| 491 |
+
" border: var(--sklearn-color-unfitted-level-3) 1pt solid;\n",
|
| 492 |
+
" color: var(--sklearn-color-unfitted-level-3);\n",
|
| 493 |
+
"}\n",
|
| 494 |
+
"\n",
|
| 495 |
+
".sk-estimator-doc-link.fitted,\n",
|
| 496 |
+
"a:link.sk-estimator-doc-link.fitted,\n",
|
| 497 |
+
"a:visited.sk-estimator-doc-link.fitted {\n",
|
| 498 |
+
" /* fitted */\n",
|
| 499 |
+
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
| 500 |
+
" border: var(--sklearn-color-fitted-level-3) 1pt solid;\n",
|
| 501 |
+
" color: var(--sklearn-color-fitted-level-3);\n",
|
| 502 |
+
"}\n",
|
| 503 |
+
"\n",
|
| 504 |
+
"/* On hover */\n",
|
| 505 |
+
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
|
| 506 |
+
".sk-estimator-doc-link:hover,\n",
|
| 507 |
+
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
|
| 508 |
+
".sk-estimator-doc-link:hover {\n",
|
| 509 |
+
" /* unfitted */\n",
|
| 510 |
+
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
| 511 |
+
" border: var(--sklearn-color-fitted-level-0) 1pt solid;\n",
|
| 512 |
+
" color: var(--sklearn-color-unfitted-level-0);\n",
|
| 513 |
+
" text-decoration: none;\n",
|
| 514 |
+
"}\n",
|
| 515 |
+
"\n",
|
| 516 |
+
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
|
| 517 |
+
".sk-estimator-doc-link.fitted:hover,\n",
|
| 518 |
+
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
|
| 519 |
+
".sk-estimator-doc-link.fitted:hover {\n",
|
| 520 |
+
" /* fitted */\n",
|
| 521 |
+
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
| 522 |
+
" border: var(--sklearn-color-fitted-level-0) 1pt solid;\n",
|
| 523 |
+
" color: var(--sklearn-color-fitted-level-0);\n",
|
| 524 |
+
" text-decoration: none;\n",
|
| 525 |
+
"}\n",
|
| 526 |
+
"\n",
|
| 527 |
+
"/* Span, style for the box shown on hovering the info icon */\n",
|
| 528 |
+
".sk-estimator-doc-link span {\n",
|
| 529 |
+
" display: none;\n",
|
| 530 |
+
" z-index: 9999;\n",
|
| 531 |
+
" position: relative;\n",
|
| 532 |
+
" font-weight: normal;\n",
|
| 533 |
+
" right: .2ex;\n",
|
| 534 |
+
" padding: .5ex;\n",
|
| 535 |
+
" margin: .5ex;\n",
|
| 536 |
+
" width: min-content;\n",
|
| 537 |
+
" min-width: 20ex;\n",
|
| 538 |
+
" max-width: 50ex;\n",
|
| 539 |
+
" color: var(--sklearn-color-text);\n",
|
| 540 |
+
" box-shadow: 2pt 2pt 4pt #999;\n",
|
| 541 |
+
" /* unfitted */\n",
|
| 542 |
+
" background: var(--sklearn-color-unfitted-level-0);\n",
|
| 543 |
+
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
|
| 544 |
+
"}\n",
|
| 545 |
+
"\n",
|
| 546 |
+
".sk-estimator-doc-link.fitted span {\n",
|
| 547 |
+
" /* fitted */\n",
|
| 548 |
+
" background: var(--sklearn-color-fitted-level-0);\n",
|
| 549 |
+
" border: var(--sklearn-color-fitted-level-3);\n",
|
| 550 |
+
"}\n",
|
| 551 |
+
"\n",
|
| 552 |
+
".sk-estimator-doc-link:hover span {\n",
|
| 553 |
+
" display: block;\n",
|
| 554 |
+
"}\n",
|
| 555 |
+
"\n",
|
| 556 |
+
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
|
| 557 |
+
"\n",
|
| 558 |
+
"#sk-container-id-4 a.estimator_doc_link {\n",
|
| 559 |
+
" float: right;\n",
|
| 560 |
+
" font-size: 1rem;\n",
|
| 561 |
+
" line-height: 1em;\n",
|
| 562 |
+
" font-family: monospace;\n",
|
| 563 |
+
" background-color: var(--sklearn-color-unfitted-level-0);\n",
|
| 564 |
+
" border-radius: 1rem;\n",
|
| 565 |
+
" height: 1rem;\n",
|
| 566 |
+
" width: 1rem;\n",
|
| 567 |
+
" text-decoration: none;\n",
|
| 568 |
+
" /* unfitted */\n",
|
| 569 |
+
" color: var(--sklearn-color-unfitted-level-1);\n",
|
| 570 |
+
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
|
| 571 |
+
"}\n",
|
| 572 |
+
"\n",
|
| 573 |
+
"#sk-container-id-4 a.estimator_doc_link.fitted {\n",
|
| 574 |
+
" /* fitted */\n",
|
| 575 |
+
" background-color: var(--sklearn-color-fitted-level-0);\n",
|
| 576 |
+
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
|
| 577 |
+
" color: var(--sklearn-color-fitted-level-1);\n",
|
| 578 |
+
"}\n",
|
| 579 |
+
"\n",
|
| 580 |
+
"/* On hover */\n",
|
| 581 |
+
"#sk-container-id-4 a.estimator_doc_link:hover {\n",
|
| 582 |
+
" /* unfitted */\n",
|
| 583 |
+
" background-color: var(--sklearn-color-unfitted-level-3);\n",
|
| 584 |
+
" color: var(--sklearn-color-background);\n",
|
| 585 |
+
" text-decoration: none;\n",
|
| 586 |
+
"}\n",
|
| 587 |
+
"\n",
|
| 588 |
+
"#sk-container-id-4 a.estimator_doc_link.fitted:hover {\n",
|
| 589 |
+
" /* fitted */\n",
|
| 590 |
+
" background-color: var(--sklearn-color-fitted-level-3);\n",
|
| 591 |
+
"}\n",
|
| 592 |
+
"\n",
|
| 593 |
+
".estimator-table {\n",
|
| 594 |
+
" font-family: monospace;\n",
|
| 595 |
+
"}\n",
|
| 596 |
+
"\n",
|
| 597 |
+
".estimator-table summary {\n",
|
| 598 |
+
" padding: .5rem;\n",
|
| 599 |
+
" cursor: pointer;\n",
|
| 600 |
+
"}\n",
|
| 601 |
+
"\n",
|
| 602 |
+
".estimator-table summary::marker {\n",
|
| 603 |
+
" font-size: 0.7rem;\n",
|
| 604 |
+
"}\n",
|
| 605 |
+
"\n",
|
| 606 |
+
".estimator-table details[open] {\n",
|
| 607 |
+
" padding-left: 0.1rem;\n",
|
| 608 |
+
" padding-right: 0.1rem;\n",
|
| 609 |
+
" padding-bottom: 0.3rem;\n",
|
| 610 |
+
"}\n",
|
| 611 |
+
"\n",
|
| 612 |
+
".estimator-table .parameters-table {\n",
|
| 613 |
+
" margin-left: auto !important;\n",
|
| 614 |
+
" margin-right: auto !important;\n",
|
| 615 |
+
" margin-top: 0;\n",
|
| 616 |
+
"}\n",
|
| 617 |
+
"\n",
|
| 618 |
+
".estimator-table .parameters-table tr:nth-child(odd) {\n",
|
| 619 |
+
" background-color: #fff;\n",
|
| 620 |
+
"}\n",
|
| 621 |
+
"\n",
|
| 622 |
+
".estimator-table .parameters-table tr:nth-child(even) {\n",
|
| 623 |
+
" background-color: #f6f6f6;\n",
|
| 624 |
+
"}\n",
|
| 625 |
+
"\n",
|
| 626 |
+
".estimator-table .parameters-table tr:hover {\n",
|
| 627 |
+
" background-color: #e0e0e0;\n",
|
| 628 |
+
"}\n",
|
| 629 |
+
"\n",
|
| 630 |
+
".estimator-table table td {\n",
|
| 631 |
+
" border: 1px solid rgba(106, 105, 104, 0.232);\n",
|
| 632 |
+
"}\n",
|
| 633 |
+
"\n",
|
| 634 |
+
"/*\n",
|
| 635 |
+
" `table td`is set in notebook with right text-align.\n",
|
| 636 |
+
" We need to overwrite it.\n",
|
| 637 |
+
"*/\n",
|
| 638 |
+
".estimator-table table td.param {\n",
|
| 639 |
+
" text-align: left;\n",
|
| 640 |
+
" position: relative;\n",
|
| 641 |
+
" padding: 0;\n",
|
| 642 |
+
"}\n",
|
| 643 |
+
"\n",
|
| 644 |
+
".user-set td {\n",
|
| 645 |
+
" color:rgb(255, 94, 0);\n",
|
| 646 |
+
" text-align: left !important;\n",
|
| 647 |
+
"}\n",
|
| 648 |
+
"\n",
|
| 649 |
+
".user-set td.value {\n",
|
| 650 |
+
" color:rgb(255, 94, 0);\n",
|
| 651 |
+
" background-color: transparent;\n",
|
| 652 |
+
"}\n",
|
| 653 |
+
"\n",
|
| 654 |
+
".default td {\n",
|
| 655 |
+
" color: black;\n",
|
| 656 |
+
" text-align: left !important;\n",
|
| 657 |
+
"}\n",
|
| 658 |
+
"\n",
|
| 659 |
+
".user-set td i,\n",
|
| 660 |
+
".default td i {\n",
|
| 661 |
+
" color: black;\n",
|
| 662 |
+
"}\n",
|
| 663 |
+
"\n",
|
| 664 |
+
"/*\n",
|
| 665 |
+
" Styles for parameter documentation links\n",
|
| 666 |
+
" We need styling for visited so jupyter doesn't overwrite it\n",
|
| 667 |
+
"*/\n",
|
| 668 |
+
"a.param-doc-link,\n",
|
| 669 |
+
"a.param-doc-link:link,\n",
|
| 670 |
+
"a.param-doc-link:visited {\n",
|
| 671 |
+
" text-decoration: underline dashed;\n",
|
| 672 |
+
" text-underline-offset: .3em;\n",
|
| 673 |
+
" color: inherit;\n",
|
| 674 |
+
" display: block;\n",
|
| 675 |
+
" padding: .5em;\n",
|
| 676 |
+
"}\n",
|
| 677 |
+
"\n",
|
| 678 |
+
"/* \"hack\" to make the entire area of the cell containing the link clickable */\n",
|
| 679 |
+
"a.param-doc-link::before {\n",
|
| 680 |
+
" position: absolute;\n",
|
| 681 |
+
" content: \"\";\n",
|
| 682 |
+
" inset: 0;\n",
|
| 683 |
+
"}\n",
|
| 684 |
+
"\n",
|
| 685 |
+
".param-doc-description {\n",
|
| 686 |
+
" display: none;\n",
|
| 687 |
+
" position: absolute;\n",
|
| 688 |
+
" z-index: 9999;\n",
|
| 689 |
+
" left: 0;\n",
|
| 690 |
+
" padding: .5ex;\n",
|
| 691 |
+
" margin-left: 1.5em;\n",
|
| 692 |
+
" color: var(--sklearn-color-text);\n",
|
| 693 |
+
" box-shadow: .3em .3em .4em #999;\n",
|
| 694 |
+
" width: max-content;\n",
|
| 695 |
+
" text-align: left;\n",
|
| 696 |
+
" max-height: 10em;\n",
|
| 697 |
+
" overflow-y: auto;\n",
|
| 698 |
+
"\n",
|
| 699 |
+
" /* unfitted */\n",
|
| 700 |
+
" background: var(--sklearn-color-unfitted-level-0);\n",
|
| 701 |
+
" border: thin solid var(--sklearn-color-unfitted-level-3);\n",
|
| 702 |
+
"}\n",
|
| 703 |
+
"\n",
|
| 704 |
+
"/* Fitted state for parameter tooltips */\n",
|
| 705 |
+
".fitted .param-doc-description {\n",
|
| 706 |
+
" /* fitted */\n",
|
| 707 |
+
" background: var(--sklearn-color-fitted-level-0);\n",
|
| 708 |
+
" border: thin solid var(--sklearn-color-fitted-level-3);\n",
|
| 709 |
+
"}\n",
|
| 710 |
+
"\n",
|
| 711 |
+
".param-doc-link:hover .param-doc-description {\n",
|
| 712 |
+
" display: block;\n",
|
| 713 |
+
"}\n",
|
| 714 |
+
"\n",
|
| 715 |
+
".copy-paste-icon {\n",
|
| 716 |
+
" background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0NDggNTEyIj48IS0tIUZvbnQgQXdlc29tZSBGcmVlIDYuNy4yIGJ5IEBmb250YXdlc29tZSAtIGh0dHBzOi8vZm9udGF3ZXNvbWUuY29tIExpY2Vuc2UgLSBodHRwczovL2ZvbnRhd2Vzb21lLmNvbS9saWNlbnNlL2ZyZWUgQ29weXJpZ2h0IDIwMjUgRm9udGljb25zLCBJbmMuLS0+PHBhdGggZD0iTTIwOCAwTDMzMi4xIDBjMTIuNyAwIDI0LjkgNS4xIDMzLjkgMTQuMWw2Ny45IDY3LjljOSA5IDE0LjEgMjEuMiAxNC4xIDMzLjlMNDQ4IDMzNmMwIDI2LjUtMjEuNSA0OC00OCA0OGwtMTkyIDBjLTI2LjUgMC00OC0yMS41LTQ4LTQ4bDAtMjg4YzAtMjYuNSAyMS41LTQ4IDQ4LTQ4ek00OCAxMjhsODAgMCAwIDY0LTY0IDAgMCAyNTYgMTkyIDAgMC0zMiA2NCAwIDAgNDhjMCAyNi41LTIxLjUgNDgtNDggNDhMNDggNTEyYy0yNi41IDAtNDgtMjEuNS00OC00OEwwIDE3NmMwLTI2LjUgMjEuNS00OCA0OC00OHoiLz48L3N2Zz4=);\n",
|
| 717 |
+
" background-repeat: no-repeat;\n",
|
| 718 |
+
" background-size: 14px 14px;\n",
|
| 719 |
+
" background-position: 0;\n",
|
| 720 |
+
" display: inline-block;\n",
|
| 721 |
+
" width: 14px;\n",
|
| 722 |
+
" height: 14px;\n",
|
| 723 |
+
" cursor: pointer;\n",
|
| 724 |
+
"}\n",
|
| 725 |
+
"</style><body><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Ridge(alpha=2.0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" checked><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>Ridge</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html\">?<span>Documentation for Ridge</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\" data-param-prefix=\"\">\n",
|
| 726 |
+
" <div class=\"estimator-table\">\n",
|
| 727 |
+
" <details>\n",
|
| 728 |
+
" <summary>Parameters</summary>\n",
|
| 729 |
+
" <table class=\"parameters-table\">\n",
|
| 730 |
+
" <tbody>\n",
|
| 731 |
+
" \n",
|
| 732 |
+
" <tr class=\"user-set\">\n",
|
| 733 |
+
" <td><i class=\"copy-paste-icon\"\n",
|
| 734 |
+
" onclick=\"copyToClipboard('alpha',\n",
|
| 735 |
+
" this.parentElement.nextElementSibling)\"\n",
|
| 736 |
+
" ></i></td>\n",
|
| 737 |
+
" <td class=\"param\">\n",
|
| 738 |
+
" <a class=\"param-doc-link\"\n",
|
| 739 |
+
" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=alpha,-%7Bfloat%2C%20ndarray%20of%20shape%20%28n_targets%2C%29%7D%2C%20default%3D1.0\">\n",
|
| 740 |
+
" alpha\n",
|
| 741 |
+
" <span class=\"param-doc-description\">alpha: {float, ndarray of shape (n_targets,)}, default=1.0<br><br>Constant that multiplies the L2 term, controlling regularization<br>strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.<br><br>When `alpha = 0`, the objective is equivalent to ordinary least<br>squares, solved by the :class:`LinearRegression` object. For numerical<br>reasons, using `alpha = 0` with the `Ridge` object is not advised.<br>Instead, you should use the :class:`LinearRegression` object.<br><br>If an array is passed, penalties are assumed to be specific to the<br>targets. Hence they must correspond in number.</span>\n",
|
| 742 |
+
" </a>\n",
|
| 743 |
+
" </td>\n",
|
| 744 |
+
" <td class=\"value\">2.0</td>\n",
|
| 745 |
+
" </tr>\n",
|
| 746 |
+
" \n",
|
| 747 |
+
"\n",
|
| 748 |
+
" <tr class=\"default\">\n",
|
| 749 |
+
" <td><i class=\"copy-paste-icon\"\n",
|
| 750 |
+
" onclick=\"copyToClipboard('fit_intercept',\n",
|
| 751 |
+
" this.parentElement.nextElementSibling)\"\n",
|
| 752 |
+
" ></i></td>\n",
|
| 753 |
+
" <td class=\"param\">\n",
|
| 754 |
+
" <a class=\"param-doc-link\"\n",
|
| 755 |
+
" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=fit_intercept,-bool%2C%20default%3DTrue\">\n",
|
| 756 |
+
" fit_intercept\n",
|
| 757 |
+
" <span class=\"param-doc-description\">fit_intercept: bool, default=True<br><br>Whether to fit the intercept for this model. If set<br>to false, no intercept will be used in calculations<br>(i.e. ``X`` and ``y`` are expected to be centered).</span>\n",
|
| 758 |
+
" </a>\n",
|
| 759 |
+
" </td>\n",
|
| 760 |
+
" <td class=\"value\">True</td>\n",
|
| 761 |
+
" </tr>\n",
|
| 762 |
+
" \n",
|
| 763 |
+
"\n",
|
| 764 |
+
" <tr class=\"default\">\n",
|
| 765 |
+
" <td><i class=\"copy-paste-icon\"\n",
|
| 766 |
+
" onclick=\"copyToClipboard('copy_X',\n",
|
| 767 |
+
" this.parentElement.nextElementSibling)\"\n",
|
| 768 |
+
" ></i></td>\n",
|
| 769 |
+
" <td class=\"param\">\n",
|
| 770 |
+
" <a class=\"param-doc-link\"\n",
|
| 771 |
+
" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=copy_X,-bool%2C%20default%3DTrue\">\n",
|
| 772 |
+
" copy_X\n",
|
| 773 |
+
" <span class=\"param-doc-description\">copy_X: bool, default=True<br><br>If True, X will be copied; else, it may be overwritten.</span>\n",
|
| 774 |
+
" </a>\n",
|
| 775 |
+
" </td>\n",
|
| 776 |
+
" <td class=\"value\">True</td>\n",
|
| 777 |
+
" </tr>\n",
|
| 778 |
+
" \n",
|
| 779 |
+
"\n",
|
| 780 |
+
" <tr class=\"default\">\n",
|
| 781 |
+
" <td><i class=\"copy-paste-icon\"\n",
|
| 782 |
+
" onclick=\"copyToClipboard('max_iter',\n",
|
| 783 |
+
" this.parentElement.nextElementSibling)\"\n",
|
| 784 |
+
" ></i></td>\n",
|
| 785 |
+
" <td class=\"param\">\n",
|
| 786 |
+
" <a class=\"param-doc-link\"\n",
|
| 787 |
+
" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=max_iter,-int%2C%20default%3DNone\">\n",
|
| 788 |
+
" max_iter\n",
|
| 789 |
+
" <span class=\"param-doc-description\">max_iter: int, default=None<br><br>Maximum number of iterations for conjugate gradient solver.<br>For 'sparse_cg' and 'lsqr' solvers, the default value is determined<br>by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.<br>For 'lbfgs' solver, the default value is 15000.</span>\n",
|
| 790 |
+
" </a>\n",
|
| 791 |
+
" </td>\n",
|
| 792 |
+
" <td class=\"value\">None</td>\n",
|
| 793 |
+
" </tr>\n",
|
| 794 |
+
" \n",
|
| 795 |
+
"\n",
|
| 796 |
+
" <tr class=\"default\">\n",
|
| 797 |
+
" <td><i class=\"copy-paste-icon\"\n",
|
| 798 |
+
" onclick=\"copyToClipboard('tol',\n",
|
| 799 |
+
" this.parentElement.nextElementSibling)\"\n",
|
| 800 |
+
" ></i></td>\n",
|
| 801 |
+
" <td class=\"param\">\n",
|
| 802 |
+
" <a class=\"param-doc-link\"\n",
|
| 803 |
+
" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=tol,-float%2C%20default%3D1e-4\">\n",
|
| 804 |
+
" tol\n",
|
| 805 |
+
" <span class=\"param-doc-description\">tol: float, default=1e-4<br><br>The precision of the solution (`coef_`) is determined by `tol` which<br>specifies a different convergence criterion for each solver:<br><br>- 'svd': `tol` has no impact.<br><br>- 'cholesky': `tol` has no impact.<br><br>- 'sparse_cg': norm of residuals smaller than `tol`.<br><br>- 'lsqr': `tol` is set as atol and btol of scipy.sparse.linalg.lsqr,<br> which control the norm of the residual vector in terms of the norms of<br> matrix and coefficients.<br><br>- 'sag' and 'saga': relative change of coef smaller than `tol`.<br><br>- 'lbfgs': maximum of the absolute (projected) gradient=max|residuals|<br> smaller than `tol`.<br><br>.. versionchanged:: 1.2<br> Default value changed from 1e-3 to 1e-4 for consistency with other linear<br> models.</span>\n",
|
| 806 |
+
" </a>\n",
|
| 807 |
+
" </td>\n",
|
| 808 |
+
" <td class=\"value\">0.0001</td>\n",
|
| 809 |
+
" </tr>\n",
|
| 810 |
+
" \n",
|
| 811 |
+
"\n",
|
| 812 |
+
" <tr class=\"default\">\n",
|
| 813 |
+
" <td><i class=\"copy-paste-icon\"\n",
|
| 814 |
+
" onclick=\"copyToClipboard('solver',\n",
|
| 815 |
+
" this.parentElement.nextElementSibling)\"\n",
|
| 816 |
+
" ></i></td>\n",
|
| 817 |
+
" <td class=\"param\">\n",
|
| 818 |
+
" <a class=\"param-doc-link\"\n",
|
| 819 |
+
" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=solver,-%7B%27auto%27%2C%20%27svd%27%2C%20%27cholesky%27%2C%20%27lsqr%27%2C%20%27sparse_cg%27%2C%20%20%20%20%20%20%20%20%20%20%20%20%20%27sag%27%2C%20%27saga%27%2C%20%27lbfgs%27%7D%2C%20default%3D%27auto%27\">\n",
|
| 820 |
+
" solver\n",
|
| 821 |
+
" <span class=\"param-doc-description\">solver: {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs'}, default='auto'<br><br>Solver to use in the computational routines:<br><br>- 'auto' chooses the solver automatically based on the type of data.<br><br>- 'svd' uses a Singular Value Decomposition of X to compute the Ridge<br> coefficients. It is the most stable solver, in particular more stable<br> for singular matrices than 'cholesky' at the cost of being slower.<br><br>- 'cholesky' uses the standard :func:`scipy.linalg.solve` function to<br> obtain a closed-form solution.<br><br>- 'sparse_cg' uses the conjugate gradient solver as found in<br> :func:`scipy.sparse.linalg.cg`. As an iterative algorithm, this solver is<br> more appropriate than 'cholesky' for large-scale data<br> (possibility to set `tol` and `max_iter`).<br><br>- 'lsqr' uses the dedicated regularized least-squares routine<br> :func:`scipy.sparse.linalg.lsqr`. It is the fastest and uses an iterative<br> procedure.<br><br>- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses<br> its improved, unbiased version named SAGA. Both methods also use an<br> iterative procedure, and are often faster than other solvers when<br> both n_samples and n_features are large. Note that 'sag' and<br> 'saga' fast convergence is only guaranteed on features with<br> approximately the same scale. You can preprocess the data with a<br> scaler from :mod:`sklearn.preprocessing`.<br><br>- 'lbfgs' uses L-BFGS-B algorithm implemented in<br> :func:`scipy.optimize.minimize`. It can be used only when `positive`<br> is True.<br><br>All solvers except 'svd' support both dense and sparse data. However, only<br>'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when<br>`fit_intercept` is True.<br><br>.. versionadded:: 0.17<br> Stochastic Average Gradient descent solver.<br>.. versionadded:: 0.19<br> SAGA solver.</span>\n",
|
| 822 |
+
" </a>\n",
|
| 823 |
+
" </td>\n",
|
| 824 |
+
" <td class=\"value\">'auto'</td>\n",
|
| 825 |
+
" </tr>\n",
|
| 826 |
+
" \n",
|
| 827 |
+
"\n",
|
| 828 |
+
" <tr class=\"default\">\n",
|
| 829 |
+
" <td><i class=\"copy-paste-icon\"\n",
|
| 830 |
+
" onclick=\"copyToClipboard('positive',\n",
|
| 831 |
+
" this.parentElement.nextElementSibling)\"\n",
|
| 832 |
+
" ></i></td>\n",
|
| 833 |
+
" <td class=\"param\">\n",
|
| 834 |
+
" <a class=\"param-doc-link\"\n",
|
| 835 |
+
" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=positive,-bool%2C%20default%3DFalse\">\n",
|
| 836 |
+
" positive\n",
|
| 837 |
+
" <span class=\"param-doc-description\">positive: bool, default=False<br><br>When set to ``True``, forces the coefficients to be positive.<br>Only 'lbfgs' solver is supported in this case.</span>\n",
|
| 838 |
+
" </a>\n",
|
| 839 |
+
" </td>\n",
|
| 840 |
+
" <td class=\"value\">False</td>\n",
|
| 841 |
+
" </tr>\n",
|
| 842 |
+
" \n",
|
| 843 |
+
"\n",
|
| 844 |
+
" <tr class=\"default\">\n",
|
| 845 |
+
" <td><i class=\"copy-paste-icon\"\n",
|
| 846 |
+
" onclick=\"copyToClipboard('random_state',\n",
|
| 847 |
+
" this.parentElement.nextElementSibling)\"\n",
|
| 848 |
+
" ></i></td>\n",
|
| 849 |
+
" <td class=\"param\">\n",
|
| 850 |
+
" <a class=\"param-doc-link\"\n",
|
| 851 |
+
" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=random_state,-int%2C%20RandomState%20instance%2C%20default%3DNone\">\n",
|
| 852 |
+
" random_state\n",
|
| 853 |
+
" <span class=\"param-doc-description\">random_state: int, RandomState instance, default=None<br><br>Used when ``solver`` == 'sag' or 'saga' to shuffle the data.<br>See :term:`Glossary <random_state>` for details.<br><br>.. versionadded:: 0.17<br> `random_state` to support Stochastic Average Gradient.</span>\n",
|
| 854 |
+
" </a>\n",
|
| 855 |
+
" </td>\n",
|
| 856 |
+
" <td class=\"value\">None</td>\n",
|
| 857 |
+
" </tr>\n",
|
| 858 |
+
" \n",
|
| 859 |
+
" </tbody>\n",
|
| 860 |
+
" </table>\n",
|
| 861 |
+
" </details>\n",
|
| 862 |
+
" </div>\n",
|
| 863 |
+
" </div></div></div></div></div><script>function copyToClipboard(text, element) {\n",
|
| 864 |
+
" // Get the parameter prefix from the closest toggleable content\n",
|
| 865 |
+
" const toggleableContent = element.closest('.sk-toggleable__content');\n",
|
| 866 |
+
" const paramPrefix = toggleableContent ? toggleableContent.dataset.paramPrefix : '';\n",
|
| 867 |
+
" const fullParamName = paramPrefix ? `${paramPrefix}${text}` : text;\n",
|
| 868 |
+
"\n",
|
| 869 |
+
" const originalStyle = element.style;\n",
|
| 870 |
+
" const computedStyle = window.getComputedStyle(element);\n",
|
| 871 |
+
" const originalWidth = computedStyle.width;\n",
|
| 872 |
+
" const originalHTML = element.innerHTML.replace('Copied!', '');\n",
|
| 873 |
+
"\n",
|
| 874 |
+
" navigator.clipboard.writeText(fullParamName)\n",
|
| 875 |
+
" .then(() => {\n",
|
| 876 |
+
" element.style.width = originalWidth;\n",
|
| 877 |
+
" element.style.color = 'green';\n",
|
| 878 |
+
" element.innerHTML = \"Copied!\";\n",
|
| 879 |
+
"\n",
|
| 880 |
+
" setTimeout(() => {\n",
|
| 881 |
+
" element.innerHTML = originalHTML;\n",
|
| 882 |
+
" element.style = originalStyle;\n",
|
| 883 |
+
" }, 2000);\n",
|
| 884 |
+
" })\n",
|
| 885 |
+
" .catch(err => {\n",
|
| 886 |
+
" console.error('Failed to copy:', err);\n",
|
| 887 |
+
" element.style.color = 'red';\n",
|
| 888 |
+
" element.innerHTML = \"Failed!\";\n",
|
| 889 |
+
" setTimeout(() => {\n",
|
| 890 |
+
" element.innerHTML = originalHTML;\n",
|
| 891 |
+
" element.style = originalStyle;\n",
|
| 892 |
+
" }, 2000);\n",
|
| 893 |
+
" });\n",
|
| 894 |
+
" return false;\n",
|
| 895 |
+
"}\n",
|
| 896 |
+
"\n",
|
| 897 |
+
"document.querySelectorAll('.copy-paste-icon').forEach(function(element) {\n",
|
| 898 |
+
" const toggleableContent = element.closest('.sk-toggleable__content');\n",
|
| 899 |
+
" const paramPrefix = toggleableContent ? toggleableContent.dataset.paramPrefix : '';\n",
|
| 900 |
+
" const paramName = element.parentElement.nextElementSibling\n",
|
| 901 |
+
" .textContent.trim().split(' ')[0];\n",
|
| 902 |
+
" const fullParamName = paramPrefix ? `${paramPrefix}${paramName}` : paramName;\n",
|
| 903 |
+
"\n",
|
| 904 |
+
" element.setAttribute('title', fullParamName);\n",
|
| 905 |
+
"});\n",
|
| 906 |
+
"\n",
|
| 907 |
+
"\n",
|
| 908 |
+
"/**\n",
|
| 909 |
+
" * Adapted from Skrub\n",
|
| 910 |
+
" * https://github.com/skrub-data/skrub/blob/403466d1d5d4dc76a7ef569b3f8228db59a31dc3/skrub/_reporting/_data/templates/report.js#L789\n",
|
| 911 |
+
" * @returns \"light\" or \"dark\"\n",
|
| 912 |
+
" */\n",
|
| 913 |
+
"function detectTheme(element) {\n",
|
| 914 |
+
" const body = document.querySelector('body');\n",
|
| 915 |
+
"\n",
|
| 916 |
+
" // Check VSCode theme\n",
|
| 917 |
+
" const themeKindAttr = body.getAttribute('data-vscode-theme-kind');\n",
|
| 918 |
+
" const themeNameAttr = body.getAttribute('data-vscode-theme-name');\n",
|
| 919 |
+
"\n",
|
| 920 |
+
" if (themeKindAttr && themeNameAttr) {\n",
|
| 921 |
+
" const themeKind = themeKindAttr.toLowerCase();\n",
|
| 922 |
+
" const themeName = themeNameAttr.toLowerCase();\n",
|
| 923 |
+
"\n",
|
| 924 |
+
" if (themeKind.includes(\"dark\") || themeName.includes(\"dark\")) {\n",
|
| 925 |
+
" return \"dark\";\n",
|
| 926 |
+
" }\n",
|
| 927 |
+
" if (themeKind.includes(\"light\") || themeName.includes(\"light\")) {\n",
|
| 928 |
+
" return \"light\";\n",
|
| 929 |
+
" }\n",
|
| 930 |
+
" }\n",
|
| 931 |
+
"\n",
|
| 932 |
+
" // Check Jupyter theme\n",
|
| 933 |
+
" if (body.getAttribute('data-jp-theme-light') === 'false') {\n",
|
| 934 |
+
" return 'dark';\n",
|
| 935 |
+
" } else if (body.getAttribute('data-jp-theme-light') === 'true') {\n",
|
| 936 |
+
" return 'light';\n",
|
| 937 |
+
" }\n",
|
| 938 |
+
"\n",
|
| 939 |
+
" // Guess based on a parent element's color\n",
|
| 940 |
+
" const color = window.getComputedStyle(element.parentNode, null).getPropertyValue('color');\n",
|
| 941 |
+
" const match = color.match(/^rgb\\s*\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)\\s*$/i);\n",
|
| 942 |
+
" if (match) {\n",
|
| 943 |
+
" const [r, g, b] = [\n",
|
| 944 |
+
" parseFloat(match[1]),\n",
|
| 945 |
+
" parseFloat(match[2]),\n",
|
| 946 |
+
" parseFloat(match[3])\n",
|
| 947 |
+
" ];\n",
|
| 948 |
+
"\n",
|
| 949 |
+
" // https://en.wikipedia.org/wiki/HSL_and_HSV#Lightness\n",
|
| 950 |
+
" const luma = 0.299 * r + 0.587 * g + 0.114 * b;\n",
|
| 951 |
+
"\n",
|
| 952 |
+
" if (luma > 180) {\n",
|
| 953 |
+
" // If the text is very bright we have a dark theme\n",
|
| 954 |
+
" return 'dark';\n",
|
| 955 |
+
" }\n",
|
| 956 |
+
" if (luma < 75) {\n",
|
| 957 |
+
" // If the text is very dark we have a light theme\n",
|
| 958 |
+
" return 'light';\n",
|
| 959 |
+
" }\n",
|
| 960 |
+
" // Otherwise fall back to the next heuristic.\n",
|
| 961 |
+
" }\n",
|
| 962 |
+
"\n",
|
| 963 |
+
" // Fallback to system preference\n",
|
| 964 |
+
" return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light';\n",
|
| 965 |
+
"}\n",
|
| 966 |
+
"\n",
|
| 967 |
+
"\n",
|
| 968 |
+
"function forceTheme(elementId) {\n",
|
| 969 |
+
" const estimatorElement = document.querySelector(`#${elementId}`);\n",
|
| 970 |
+
" if (estimatorElement === null) {\n",
|
| 971 |
+
" console.error(`Element with id ${elementId} not found.`);\n",
|
| 972 |
+
" } else {\n",
|
| 973 |
+
" const theme = detectTheme(estimatorElement);\n",
|
| 974 |
+
" estimatorElement.classList.add(theme);\n",
|
| 975 |
+
" }\n",
|
| 976 |
+
"}\n",
|
| 977 |
+
"\n",
|
| 978 |
+
"forceTheme('sk-container-id-4');</script></body>"
|
| 979 |
+
],
|
| 980 |
+
"text/plain": [
|
| 981 |
+
"Ridge(alpha=2.0)"
|
| 982 |
+
]
|
| 983 |
+
},
|
| 984 |
+
"execution_count": 17,
|
| 985 |
+
"metadata": {},
|
| 986 |
+
"output_type": "execute_result"
|
| 987 |
+
}
|
| 988 |
+
],
|
| 989 |
+
"source": [
|
| 990 |
+
"#creation of the model\n",
|
| 991 |
+
"model = LinearRegression()\n",
|
| 992 |
+
"model.fit(x_train,y_train)"
|
| 993 |
+
]
|
| 994 |
+
},
|
| 995 |
+
{
|
| 996 |
+
"cell_type": "code",
|
| 997 |
+
"execution_count": 18,
|
| 998 |
+
"id": "8ccc5166-fb20-44a7-95bc-925e5b3c8f4d",
|
| 999 |
+
"metadata": {},
|
| 1000 |
+
"outputs": [
|
| 1001 |
+
{
|
| 1002 |
+
"name": "stdout",
|
| 1003 |
+
"output_type": "stream",
|
| 1004 |
+
"text": [
|
| 1005 |
+
"la valeur de a est : [0.9314094]\n",
|
| 1006 |
+
"la valeur de b est -11.480004226844926\n"
|
| 1007 |
+
]
|
| 1008 |
+
}
|
| 1009 |
+
],
|
| 1010 |
+
"source": [
|
| 1011 |
+
"#y=ax+b\n",
|
| 1012 |
+
"print('la valeur de a est :',model.coef_)\n",
|
| 1013 |
+
"print('la valeur de b est ',model.intercept_)"
|
| 1014 |
+
]
|
| 1015 |
+
},
|
| 1016 |
+
{
|
| 1017 |
+
"cell_type": "markdown",
|
| 1018 |
+
"id": "93239f43-5f1f-4a35-9479-0b9de2752743",
|
| 1019 |
+
"metadata": {},
|
| 1020 |
+
"source": [
|
| 1021 |
+
"### 🧠 Interprétation des Résultats\n",
|
| 1022 |
+
"\n",
|
| 1023 |
+
"Voici ce que les mathématiques racontent sur la réalité de nos écoles :\n",
|
| 1024 |
+
"\n",
|
| 1025 |
+
"Mon modèle a trouvé l'équation suivante :\n",
|
| 1026 |
+
"$$Absents = 0.066 \\times Inscrits + 11.12$$\n",
|
| 1027 |
+
"\n",
|
| 1028 |
+
"**1. Le Coefficient ($a \\approx 0.066$) : Le Taux d'Absentéisme**\n",
|
| 1029 |
+
"* C'est la pente de la droite.\n",
|
| 1030 |
+
"* Cela signifie que **pour chaque nouvel élève inscrit**, le nombre d'absents augmente d'environ **0.066**.\n",
|
| 1031 |
+
"* *En clair :* Sur un groupe de 100 élèves, on peut s'attendre statistiquement à ce qu'environ **6 ou 7** soient absents ($100 \\times 0.066 = 6.6$).\n",
|
| 1032 |
+
"\n",
|
| 1033 |
+
"**2. L'Intercept ($b \\approx 11.12$) : Le Calibrage**\n",
|
| 1034 |
+
"* C'est l'ordonnée à l'origine (le point de départ de la droite).\n",
|
| 1035 |
+
"* Théoriquement, cela voudrait dire qu'une école avec **0 élève** aurait quand même **11 absents**.\n",
|
| 1036 |
+
"* *En réalité :* C'est physiquement impossible, mais c'est un ajustement mathématique nécessaire pour que la ligne droite passe au mieux au milieu du nuage de points."
|
| 1037 |
+
]
|
| 1038 |
+
},
|
| 1039 |
+
{
|
| 1040 |
+
"cell_type": "code",
|
| 1041 |
+
"execution_count": 19,
|
| 1042 |
+
"id": "84752c5b-601b-4923-9774-6c5d05d0a212",
|
| 1043 |
+
"metadata": {},
|
| 1044 |
+
"outputs": [
|
| 1045 |
+
{
|
| 1046 |
+
"name": "stdout",
|
| 1047 |
+
"output_type": "stream",
|
| 1048 |
+
"text": [
|
| 1049 |
+
"[ 221.37234507 183.18455978 497.06952663 ... 379.71194259 384.36898957\n",
|
| 1050 |
+
" 1263.6194605 ]\n"
|
| 1051 |
+
]
|
| 1052 |
+
}
|
| 1053 |
+
],
|
| 1054 |
+
"source": [
|
| 1055 |
+
"#prediction\n",
|
| 1056 |
+
"predict = model.predict(x_test)\n",
|
| 1057 |
+
"print(predict)\n"
|
| 1058 |
+
]
|
| 1059 |
+
},
|
| 1060 |
+
{
|
| 1061 |
+
"cell_type": "code",
|
| 1062 |
+
"execution_count": 23,
|
| 1063 |
+
"id": "4c4cc377-b7a5-4640-baac-097637abfe12",
|
| 1064 |
+
"metadata": {},
|
| 1065 |
+
"outputs": [
|
| 1066 |
+
{
|
| 1067 |
+
"data": {
|
| 1068 |
+
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAiRRJREFUeJzt3XlcVFX/wPHPAIICAoosKoj7vmsZpqlJ4lYZWm6plWW5lLimbWqbZptmaam/R3tKs0xK09RIcTcrU3PLTDFXcAVcQYbz++M+M3JhgBmYYVi+79frvnDOPffec++MzJezGpRSCiGEEEKIUszF2QUQQgghhHA2CYiEEEIIUepJQCSEEEKIUk8CIiGEEEKUehIQCSGEEKLUk4BICCGEEKWeBERCCCGEKPUkIBJCCCFEqScBkdDZsmULr7/+OsnJyc4uiihCVq1axTvvvMPt27edXRQhhHAICYiE2b///kuvXr0oX748vr6+Vh1TvXp1nnjiCfPrTZs2YTAY2LRpk93KZTAYmDp1qt3OV1BPPPEE1atXt9v5pk6disFgsNv57O3333+nX79+1K5dmzJlyji7OIUmIyODxo0b89Zbbzm7KA5h78+xo61btw5vb28uXLhg03EjRozggQcecFCpxD333MPEiROdXQy7kICoGFu8eDEGg8G8lS1blrp16zJq1CgSExNtOtft27fp27cvTzzxBGPGjHFQiXP2448/FqmgpzgzBVimzdPTk4YNG/LKK6+QkpJi07mSkpJ47LHHmDFjBr1793ZQiYumr776ilOnTjFq1CiL++fOnYvBYKBNmzYW9x86dIipU6dy4sQJi8cuXrzYjqUt+bp27Urt2rWZPn261cfEx8ezcOFCXnrpJXPaiRMndP8/XFxcqFixIt26dWPnzp3ZzrFhwwaeeuop6tati6enJzVr1uTpp5/m3LlzdrkvWyUlJTFs2DACAgLw8vKiU6dO/PHHH1Yf//HHH9OgQQM8PDyoWrUqY8eO5fr169nynTt3jmHDhlGjRg3KlStHrVq1GDt2LJcuXdLle/HFF/nkk09ISEgo8L05nRLF1qJFixSgXn/9dfXFF1+oBQsWqCFDhigXFxdVo0YNdf36davPtWfPHvXhhx+qjIwMm8oQFhamhgwZYn5tNBrVzZs3ldFotOk8I0eOVDl9HG/evKlu375t0/kcaciQISosLMxu55syZUqO916Q882bN0998cUXat68eeqRRx5RgAoPD7fpPY6Li1P/93//Z7eyFSfNmjVTw4YNy3F/27ZtVfXq1RWgjh49mm3/8uXLFaDi4uKy7WvUqJHq0KGDHUtrO3t/jgvD3Llzlaenp0pJSbEq/+jRo1XdunV1afHx8QpQ/fv3V1988YVavHixeumll5Sfn5/y8PBQf/75py5/q1atVI0aNdTEiRPVggUL1OTJk1X58uVVUFCQOnfunN3uzRpGo1G1bdtWeXl5qalTp6qPP/5YNWzYUJUvX179/fffeR4/ceJEBag+ffqoefPmqeeff165ubmpLl266PJdvXpVhYWFqUqVKqnXXntNLViwQI0aNUqVKVNGNW/eXPf73Wg0quDgYPXqq6/a/X4LmwRExZgpIPrtt9906WPHjlWAWrp0aY7HXrt2zS5lyBoQ5VduAVFRU1wCogsXLujSo6KiFKB27NiR47G2BNEl2R9//KEA9fPPP1vcf/z4cQWomJgYFRAQoKZOnZotjwRE9peYmKhcXV2tCtLT0tJUpUqV1CuvvKJLNwVE7777ri597dq1ClDDhw/XpW/evDnbH3ibN29WgHr55ZfzeSf58/XXXytALV++3Jx2/vx55efnp/r375/rsWfPnlVubm5q0KBBuvQ5c+YoQK1atcqctmTJEgWo1atX6/K+9tprClB//PGHLn3UqFEqLCzM5j+oixppMiuB7r//fkCrLgatr4C3tzfHjh2je/fulC9fnoEDBwJaP4lZs2bRqFEjypYtS1BQEM8++yxXrlzRnVMpxZtvvklISAienp506tSJgwcPZrt2Tn2Idu3aRffu3alQoQJeXl40bdqU2bNnm8v3ySefAOiqsk0s9SHas2cP3bp1w8fHB29vbzp37swvv/yiy2NqUty+fTtjx441VzE/8sgjVvdD+P7772ncuDFly5alcePGfPfddxbzWfscrbVo0SLuv/9+AgMD8fDwoGHDhsybNy9f5zLJ+rno2LEjjRs3Zvfu3dx33314enqamxZSU1OZMmUKtWvXxsPDg9DQUCZOnEhqaqrunLGxsbRr1w4/Pz+8vb2pV6+ernnClnMZDAZGjRplfuYeHh40atSIdevWZbuXM2fOMHToUKpUqYKHhwc1atRg+PDhpKWlmfMkJSURHR1NaGgoHh4e1K5dm3feeYeMjIw8n9X333+Pu7s79913n8X9S5YsoUKFCvTo0YM+ffqwZMkS3f7Fixfz6KOPAtCpUyfzZ3rTpk1Ur16dgwcPsnnzZnN6x44dbSq3qdnnvffeY/78+dSqVQsPDw/uuusufvvtN4v3Y8/PcfXq1enZsyfbtm3j7rvvpmzZstSsWZP//ve/2c6ZlJTEmDFjqF69Oh4eHoSEhDB48GAuXrzItWvX8PLyYvTo0dmOO336NK6urromssDAQJo2bcrKlSstlj+zbdu2cfHiRSIiIvLMC9C+fXsAjh07pku/7777cHFxyZZWsWJFDh8+bNW57eXbb78lKCiIqKgoc1pAQACPPfYYK1euzPZ/KrOdO3eSnp5Ov379dOmm18uWLTOnmZrWg4KCdHkrV64MQLly5XTpDzzwAP/++y979+61/aaKEDdnF0DYn+k/tL+/vzktPT2dyMhI2rVrx3vvvYenpycAzz77LIsXL+bJJ5/khRdeID4+no8//pg9e/awfft2cyfa1157jTfffJPu3bvTvXt3/vjjD7p06aL7AspJbGwsPXv2pHLlyowePZrg4GAOHz7M6tWrGT16NM8++yxnz54lNjaWL774Is/zHTx4kPbt2+Pj48PEiRMpU6YMn332GR07dmTz5s3Z+nQ8//zzVKhQgSlTpnDixAlmzZrFqFGj+Prrr3O9zk8//UTv3r1p2LAh06dP59KlSzz55JOEhIRky2vtc7TWvHnzaNSoEQ899BBubm788MMPjBgxgoyMDEaOHGnTuUwsfS4uXbpEt27d6NevH48//jhBQUFkZGTw0EMPsW3bNoYNG0aDBg3Yv38/H374IX///Tfff/89oL0PPXv2pGnTprz++ut4eHjwzz//sH37dvP5rT2XybZt24iJiWHEiBGUL1+ejz76iN69e3Py5Elzuc+ePcvdd99t7ktRv359zpw5w7fffsuNGzdwd3fnxo0bdOjQgTNnzvDss89SrVo1duzYweTJkzl37hyzZs3K9Vnt2LGDxo0b5/i+LVmyhKioKNzd3enfvz/z5s3jt99+46677gK0L8wXXniBjz76iJdeeokGDRoA0KBBA2bNmsXzzz+Pt7c3L7/8MnDni8fWci9dupSrV6/y7LPPYjAYmDlzJlFRURw/ftxcdkd9jv/55x/69OnD0KFDGTJkCP/5z3944oknaNWqFY0aNQLg2rVrtG/fnsOHD/PUU0/RsmVLLl68yKpVqzh9+jTNmzfnkUce4euvv+aDDz7A1dXVfP6vvvoKpZT5jzeTVq1aZfvc5PQeGgwGWrRokWdewNzXq0KFCnnmvXbtGteuXaNSpUp55r1x4wY3btzIM5+rq2ue196zZw8tW7bMFqDdfffdzJ8/n7///psmTZpYPNYULGUNZkzfBbt37zanmYLA0aNH8/777xMSEsKff/7JW2+9Ra9evahfv77uHK1atQJg+/btVj/vIsnZVVQi/0xNZj///LO6cOGCOnXqlFq2bJny9/dX5cqVU6dPn1ZKaVXjgJo0aZLu+K1btypALVmyRJe+bt06Xfr58+eVu7u76tGjh65K9KWXXlKArsksLi5O10yQnp6uatSoocLCwtSVK1d018l8rtyazAA1ZcoU8+tevXopd3d3dezYMXPa2bNnVfny5dV9992X7flERETorjVmzBjl6uqqkpKSLF7PpHnz5qpy5cq6fD/99JMCdE0N1j7HnFhqMrtx40a2fJGRkapmzZq5nivz+Y4cOaIuXLig4uPj1WeffaY8PDxUUFCQuVmsQ4cOClCffvqp7vgvvvhCubi4qK1bt+rSP/30UwWo7du3K6WU+vDDDy02zeXnXEpp77O7u7v6559/zGn79u1TgJozZ445bfDgwcrFxSVbU7FSdz5Tb7zxhvLy8srWr2LSpEnK1dVVnTx5MscyK6VUSEiI6t27t8V9v//+uwJUbGys+ZohISFq9OjRunz5aTKzttymZh9/f391+fJlc76VK1cqQP3www/mNEd8jsPCwhSgtmzZYk47f/688vDwUOPGjTOnmZpYYmJist2r6b1av369AtTatWt1+5s2bWrxGb399tsKUImJidn2Zfb4448rf3//bOmmZzdt2jR14cIFlZCQoLZu3aruuuuubM1ROXnjjTcUoDZs2JBnXtP/x7w2a5ovvby81FNPPZUtfc2aNQpQ69aty/HY3bt3K0C98cYbunTT++vt7a1LX7hwofLz89OVcciQITn253R3d8/W3FjcSJNZCRAREUFAQAChoaH069cPb29vvvvuO6pWrarLN3z4cN3r5cuX4+vrywMPPMDFixfNW6tWrfD29iYuLg6An3/+mbS0NJ5//nldU1Z0dHSeZduzZw/x8fFER0fj5+en25efoeZGo5GffvqJXr16UbNmTXN65cqVGTBgANu2bcs2kmrYsGG6a7Vv3x6j0ci///6b43XOnTvH3r17GTJkiG4KggceeICGDRvq8lr7HG2R+a+45ORkLl68SIcOHTh+/LjVc0TVq1ePgIAAatSowbPPPkvt2rVZs2aN+S9CAA8PD5588sls99OgQQPq16+vux9Tk5vpfkzv58qVK3NshrL2XCYRERHUqlXL/Lpp06b4+Phw/PhxQKtx+v7773nwwQdp3bp1tuuZ3ufly5fTvn17KlSooLtuREQERqORLVu25PrsLl26lONf60uWLCEoKIhOnTqZr9m3b1+WLVuG0WjM9bx5sbXcffv21ZXT1Oxjel6O/Bw3bNjQfD3Qmm7q1atnvjbAihUraNasGY888ki2ezW9VxEREVSpUkXX7HjgwAH+/PNPHn/88WzHme734sWLlh6hWW7vIcCUKVMICAggODjYXIv1/vvv06dPn1zPu2XLFqZNm8Zjjz1m/hznZvDgwcTGxua5ZW12teTmzZt4eHhkSy9btqx5f05atmxJmzZteOedd1i0aBEnTpxg7dq1PPvss5QpUybbsVWrVuXuu+9m1qxZfPfdd4wdO5YlS5YwadIki+c3fWaLM2kyKwE++eQT6tati5ubG0FBQdSrVy9blaqbm1u2KvKjR4+SnJxMYGCgxfOeP38ewBw41KlTR7c/ICAgzypeUzNN48aNrb+hXFy4cIEbN25Qr169bPsaNGhARkYGp06dMlfZA1SrVk2Xz1Tm3Pr35HTPoAUamYe5WvscbbF9+3amTJnCzp07s1W3JycnWzVP1IoVK/Dx8aFMmTKEhIToAg2TqlWr4u7urks7evQohw8fJiAgwOJ5TffTt29fFi5cyNNPP82kSZPo3LkzUVFR9OnTx/z5s/ZcJlnfK9DeL9N7deHCBVJSUvL8PB09epQ///zT6utaopTKlmY0Glm2bBmdOnUy98UCaNOmDe+//z4bNmygS5cueZ7bXuXO67PtyM9xXu8VaP//85quwcXFhYEDBzJv3jxu3LiBp6cnS5YsoWzZsuZ+WJmZ3hdr/qCy9B6aDBs2jEcffZRbt26xceNGPvroozwD2r/++otHHnmExo0bs3DhwjyvD1CzZk3dH28FUa5cOYv9hG7dumXen5sVK1bQt29fnnrqKUBrphs7diybN2/myJEj5nzbt2+nZ8+e/PLLL+Y/PHr16oWPjw/Tpk3jqaeeyhZQK6WK9Hxq1pCAqAS4++67Lf61nJmHh0e2ICkjI4PAwMAc/zLJ6ZdycZO5X0Jmuf2ytIW9n+OxY8fo3Lkz9evX54MPPiA0NBR3d3d+/PFHPvzwQ6s6BYPWDyCvPg6WfoFmZGTQpEkTPvjgA4vHhIaGmo/dsmULcXFxrFmzhnXr1vH1119z//3389NPP+Hq6mr1uUzs9V5lZGTwwAMP5DhhXN26dXM93t/f32LAvHHjRs6dO8eyZct0nVBNlixZUqCAyNZy2/Ozbevn2J7XHjx4MO+++y7ff/89/fv3Z+nSpfTs2dNi4G96X/L6bOf0HprUqVPH3OG6Z8+euLq6MmnSJDp16mTx9+mpU6fo0qULvr6+/Pjjj5QvX96qezP1N8qLq6trnr8rKleubHH+I1NalSpVcj2+atWqbNu2jaNHj5KQkECdOnUIDg6mSpUqus/WZ599RlBQULbn8NBDDzF16lR27NiRLSBKSkqyqk9VUSYBUSlWq1Ytfv75Z+69995c/7IICwsDtL8gM/+lc+HChTxHUZlqJQ4cOJDraA9r/7IICAjA09NT99eMyV9//YWLi0u2L9n8yHzPWWW9trXP0Vo//PADqamprFq1SvdXeH6a3vKjVq1a7Nu3j86dO+f5vri4uNC5c2c6d+7MBx98wNtvv83LL79MXFycufnL2nNZIyAgAB8fHw4cOJDnPVy7ds3qEUZZ1a9fX1cDZLJkyRICAwPNoyIzi4mJ4bvvvuPTTz+lXLlyud5vTvsKWu6snPk5Np0zr/cKtBrkFi1asGTJEkJCQjh58iRz5syxmDc+Pp5KlSrlGTzUr1+fJUuWWF2j+vLLL7NgwQJeeeWVbCMbL126RJcuXUhNTWXDhg3m0VbWeO+995g2bVqe+cLCwixO4plZ8+bN2bp1KxkZGbo/cHft2oWnp2eegb5JnTp1zLWGhw4d4ty5c7oVBxITEy3WlpmW7klPT9elnzlzhrS0NPPggeJK+hCVYo899hhGo5E33ngj27709HSSkpIArY2/TJkyzJkzR/fXX14jdUBrt65RowazZs0yn88k87m8vLwAsuXJytXVlS5durBy5UrdL4/ExESWLl1Ku3bt8PHxybNcealcuTLNmzfn888/1/XZiY2N5dChQ7q81j5Ha5n+8s78fJKTk1m0aJFN58mvxx57jDNnzrBgwYJs+27evGme1fby5cvZ9jdv3hy4M6LF2nNZy8XFhV69evHDDz/w+++/Z9tvemaPPfYYO3fuZP369dnyJCUlZfuFnlV4eDgHDhzQNU/cvHmTmJgYevbsSZ8+fbJto0aN4urVq6xatQrI/TPt5eVlMb2g5c7KmZ9jgN69e7Nv3z6Lw/yz1iQNGjSIn376iVmzZuHv70+3bt0snnP37t2Eh4fnee3w8HCUUrrRU7nx8/Pj2WefZf369brh49evX6d79+6cOXOGH3/80WLzY27s2YeoT58+JCYmEhMTY067ePEiy5cv58EHH9T1Lzp27Fi2KQSyysjIYOLEiXh6evLcc8+Z0+vWrUtiYmK26VO++uorgGwjyUzPuG3btnneQ1EmNUSlWIcOHXj22WeZPn06e/fupUuXLpQpU4ajR4+yfPlyZs+eTZ8+fQgICGD8+PFMnz6dnj170r17d/bs2cPatWvzrCJ1cXFh3rx5PPjggzRv3pwnn3ySypUr89dff3Hw4EHzL37TsM0XXniByMhIXF1ds82XYfLmm2+a578ZMWIEbm5ufPbZZ6SmpjJz5ky7PZ/p06fTo0cP2rVrx1NPPcXly5eZM2cOjRo10lWBW/scrdWlSxfc3d158MEHefbZZ7l27RoLFiwgMDCwUJYLGDRoEN988w3PPfcccXFx3HvvvRiNRv766y+++eYb1q9fT+vWrXn99dfZsmULPXr0ICwsjPPnzzN37lxCQkJo166dTeeyxdtvv81PP/1Ehw4dzEP5z507x/Lly9m2bRt+fn5MmDCBVatW0bNnT/NQ8OvXr7N//36+/fZbTpw4ketn9+GHH+aNN95g8+bN5iawVatWcfXqVR566CGLx9xzzz0EBASwZMkS+vbtS/PmzXF1deWdd94hOTkZDw8P89xSrVq1Yt68ebz55pvUrl2bwMBA7r///gKX2xJnfY4BJkyYwLfffsujjz7KU089RatWrbh8+TKrVq3i008/pVmzZua8AwYMYOLEiXz33XcMHz7c4pQH58+f588//7Rq6ol27drh7+/Pzz//bFXnZ4DRo0cza9YsZsyYYW4SHThwIL/++itPPfUUhw8f1s095O3tTa9evXI9pz37EPXp04d77rmHJ598kkOHDlGpUiXmzp2L0WjMVgvVuXNnAN0fjqNHj+bWrVs0b96c27dvs3TpUn799Vc+//xzXW30qFGjWLRoEQ8++CDPP/88YWFhbN68ma+++ooHHngg29QmsbGxVKtWrXgPuQcZdl+c5TRTdVZDhgxRXl5eOe6fP3++atWqlSpXrpwqX768atKkiZo4caI6e/asOY/RaFTTpk1TlStXVuXKlVMdO3ZUBw4cyDZTddZh9ybbtm1TDzzwgCpfvrzy8vJSTZs21Q2lTk9PV88//7wKCAhQBoNBNwydLMPuldJmEo6MjFTe3t7K09NTderUKdsMzDk9n5zKaMmKFStUgwYNlIeHh2rYsKGKiYnJcYZfa56jJZaG3a9atUo1bdpUlS1bVlWvXl2988476j//+Y8CVHx8vFXny204vFLasPtGjRpZ3JeWlqbeeecd1ahRI+Xh4aEqVKigWrVqpaZNm6aSk5OVUkpt2LBBPfzww6pKlSrK3d1dValSRfXv3z/bkHFrzqWU9j6PHDkyW1kszYb+77//qsGDB6uAgADl4eGhatasqUaOHKlSU1PNea5evaomT56sateurdzd3VWlSpVU27Zt1XvvvafS0tJyfTZKacO+hw4dan794IMPqrJly+Y6m/cTTzyhypQpoy5evKiUUmrBggWqZs2aytXVVfeZS0hIUD169FDly5dXgG54uTXlzmm2ZdNzzPr/xd6f47CwMNWjR49sx3bo0CHbUPlLly6pUaNGqapVqyp3d3cVEhKihgwZYn5GmXXv3j3X2dTnzZtn09IdL7zwgqpdu7YuLbdnp5T2Hrq6upqnfzBNMWBpc8ZM35cvX1ZDhw5V/v7+ytPTU3Xo0MHid0BYWFi28i1atEg1a9ZMeXl5qfLly6vOnTurjRs3WrzOX3/9pfr06aNCQ0NVmTJlVFhYmBo/fny2z7/RaFSVK1fONiN4cWRQyk49S4UQogT54osvGDlyJCdPnsw2ZYRwjEceeYT9+/fzzz//WNzfokULOnbsyIcffmjV+Y4fP079+vVZu3atucZE2Nf333/PgAEDOHbsmE19q4oi6UMkhBAWDBw4kGrVqlnsQC3s79y5c6xZs4ZBgwZZ3L9u3TqOHj3K5MmTrT5nzZo1GTp0KDNmzLBXMUUW77zzDqNGjSr2wRCA1BAJIYRwmvj4eLZv387ChQv57bffOHbsGMHBwc4uliiFpIZICCGE02zevJlBgwYRHx/P559/LsGQcBqpIRJCCCFEqSc1REIIIYQo9SQgEkIIIUSpJxMzWiEjI4OzZ89Svnz5Yr94nRBCCFFaKKW4evUqVapUybaeZ1YSEFnh7NmzdlkfSwghhBCF79SpU4SEhOSaRwIiK5hWNT516pRd1skSQgghhOOlpKQQGhpq/h7PjQREVjA1k/n4+EhAJIQQQhQz1nR3kU7VQgghhCj1nB4QnTlzhscffxx/f3/KlStHkyZN+P333837lVK89tprVK5cmXLlyhEREcHRo0d157h8+TIDBw7Ex8cHPz8/hg4dqlvFGeDPP/+kffv2lC1bltDQULuuii6EEEKI4s2pAdGVK1e49957KVOmDGvXruXQoUO8//77VKhQwZxn5syZfPTRR3z66afs2rULLy8vIiMjuXXrljnPwIEDOXjwILGxsaxevZotW7YwbNgw8/6UlBS6dOlCWFgYu3fv5t1332Xq1KnMnz+/UO9XCCGEEEWTU2eqnjRpEtu3b2fr1q0W9yulqFKlCuPGjWP8+PEAJCcnExQUxOLFi+nXrx+HDx+mYcOG/Pbbb7Ru3RrQFgHs3r07p0+fpkqVKsybN4+XX36ZhIQE3N3dzdf+/vvv+euvv/IsZ0pKCr6+viQnJ0sfIiGEEKKYsOX726k1RKtWraJ169Y8+uijBAYG0qJFCxYsWGDeHx8fT0JCAhEREeY0X19f2rRpw86dOwHYuXMnfn5+5mAIICIiAhcXF3bt2mXOc99995mDIYDIyEiOHDnClStXspUrNTWVlJQU3SaEEEKIksupAdHx48eZN28ederUYf369QwfPpwXXniBzz//HICEhAQAgoKCdMcFBQWZ9yUkJBAYGKjb7+bmRsWKFXV5LJ0j8zUymz59Or6+vuZN5iASQgghSjanBkQZGRm0bNmSt99+mxYtWjBs2DCeeeYZPv30U2cWi8mTJ5OcnGzeTp065dTyCCGEEMKxnBoQVa5cmYYNG+rSGjRowMmTJwEIDg4GIDExUZcnMTHRvC84OJjz58/r9qenp3P58mVdHkvnyHyNzDw8PMxzDsncQ0IIIUTJ59SA6N577+XIkSO6tL///puwsDAAatSoQXBwMBs2bDDvT0lJYdeuXYSHhwMQHh5OUlISu3fvNufZuHEjGRkZtGnTxpxny5Yt3L5925wnNjaWevXq6Ua0CSGEEKJ0cmpANGbMGH755Rfefvtt/vnnH5YuXcr8+fMZOXIkoM0sGR0dzZtvvsmqVavYv38/gwcPpkqVKvTq1QvQapS6du3KM888w6+//sr27dsZNWoU/fr1o0qVKgAMGDAAd3d3hg4dysGDB/n666+ZPXs2Y8eOddatCyGEEKWW0QibNsFXX2k/jUZnlwhQTvbDDz+oxo0bKw8PD1W/fn01f/583f6MjAz16quvqqCgIOXh4aE6d+6sjhw5ostz6dIl1b9/f+Xt7a18fHzUk08+qa5evarLs2/fPtWuXTvl4eGhqlatqmbMmGF1GZOTkxWgkpOT83+jQgghhFArVigVEqIU3NkqVVIqOlqpuDil0tPtdy1bvr+dOg9RcSHzEAkhhBAFFxMDffpoYVBOQkJg9myIiir49YrNPERCCCGEKB2MRhg9OvdgCOD0aS1oiokpnHKZSEAkhBBCCIfbulULdqyhFERHF27fIgmIhBBCCOFw587Zlv/UKS2IKiwSEAkhhBDC4SpXtv2YM2fsX46cSEAkhBBCCIdr3x4qVbLtmAsXHFMWSyQgEkIIIYTDubrCfffZdkxAgGPKYokEREIIIYQoFEeP2pa/alXHlMMSCYiEEEII4XBGIxw4YH3+SpW0ZrbCIgGREEIIIRxuw4a85yDK7OOPtWa2wiIBkRBCCCEcbtEi2/IHBTmmHDmRgEgIIYQQDvfLL7blt3XeooKSgEgIIYQQDufublv+/MxbVBASEAkhhBDC4cLCrM/r7l64HapBAiIhhBBCOJjRaFuTWd++hduhGiQgEkIIIYSDbdoEV69an3/QIIcVJUcSEAkhhBDCoTZutC3/xYuOKUduJCASQgghhEOdPGlb/sLuUA0SEAkhhBDCwapVsz5vSEjhd6gGCYiEEEII4WAdO1qf95lnCr9DNUhAJIQQQggHc8kl2hjNLBQG3uIlAOrUKaRCZSEBkRBCCCEc6vz57Gke3EJhYBZjABjIEsA5/YdAAiIhhBBCOFjWIKcL67lFOV3afWwhIMA5/YdAAiIhhBBCOFj79uDvD6DYxr2sp6t53yoexIDiJGHMneuc/kMAbs65rBBCCCFKk9rGv7hIA11aO7aynXYAeHvDI484o2QaqSESQgghhEOd7TeWX5LuBEMXqEQZ0szBEMC1a7B1qzNKp5EaIiGEEEI4xpUrULEioZmSnuQ/LOZJi9nPnSucYlkiAZEQQggh7O/zz+GJJ3RJFbnEFSrmeIizRpiBNJkJIYQQwp7S0yE4WBcMZTz/AqEhiiSD5WDIYIDQUOeNMAMJiIQQQghhLzt3QpkykJh4J+3QIVw+ms3s2dpLg0F/iOn1rFnOG2EGEhAJIYQQwh5694a2be+8btMGMjKggdaZOioKvv0WqlbVHxYSoqVHRRViWS2QPkRCCCGEyL9Tp7Kv3vrjj9CtW7asUVHw8MPaaLJz57Q+Q+3bO7dmyEQCIiGEEELkz/Tp8NJL+rQbN6BcOcv50YIfWxZ7LSwSEAkhhBDCNtevazMpZvbuuzB+vHPKYwcSEAkhhBDCeitXQq9e+rQzZ6BKFacUx16kU7UQQggh8paRAc2b64Ohfv1AqWIfDIHUEAkhhBAiLwcOQJMm+rRff4W77nJOeRxAaoiEEEIIkbPhw/XBUPXq2uSLJSgYAqkhEkIIIYQlFy9CQIA+belS6N/fOeVxMKkhEkIIIYTe/PnZg6GkpBIbDIEEREIIIYQwSUsDHx949tk7aZMnax2nfX2dV65CIE1mQgghhIBNm6BTJ33a0aNQu7ZTilPYpIZICCGEKM2Ugq5d9cHQ/fdrw+xLSTAEUkMkhBBClF7Hj0OtWvq0n3+Gzp2dUx4nkhoiIYQQojR69VV9MFS2LNy6VSqDIZAaIiGEEKJ0uXpV6zid2ccfw8iRzilPESEBkRBCCFFafPMN9O2rT0tIgKAg55SnCJEmMyGEEKKkMxq1DtKZg6GhQ7UO1RIMAU4OiKZOnYrBYNBt9evXN++/desWI0eOxN/fH29vb3r37k1iYqLuHCdPnqRHjx54enoSGBjIhAkTSE9P1+XZtGkTLVu2xMPDg9q1a7N48eLCuD0hhBDC+f74A9zc4NixO2l79sDChc4rUxHk9BqiRo0ace7cOfO2bds2874xY8bwww8/sHz5cjZv3szZs2eJiooy7zcajfTo0YO0tDR27NjB559/zuLFi3nttdfMeeLj4+nRowedOnVi7969REdH8/TTT7N+/fpCvU8hhBCi0A0ZAq1a3XndqJFWW9S8udOKVFQZlFLKWRefOnUq33//PXv37s22Lzk5mYCAAJYuXUqfPn0A+Ouvv2jQoAE7d+7knnvuYe3atfTs2ZOzZ88S9L8qv08//ZQXX3yRCxcu4O7uzosvvsiaNWs4cOCA+dz9+vUjKSmJdevWWVXOlJQUfH19SU5OxidrRzQhhBCiqElIgMqV9WkrVkCmSoXSwJbvb6fXEB09epQqVapQs2ZNBg4cyMmTJwHYvXs3t2/fJiIiwpy3fv36VKtWjZ07dwKwc+dOmjRpYg6GACIjI0lJSeHgwYPmPJnPYcpjOoclqamppKSk6DYhhBCiWJg9O3swdPVqqQuGbOXUgKhNmzYsXryYdevWMW/ePOLj42nfvj1Xr14lISEBd3d3/Pz8dMcEBQWRkJAAQEJCgi4YMu037cstT0pKCjdv3rRYrunTp+Pr62veQkND7XG7QgghhOPcugWurhAdfSdt2jSt47S3t9OKVVw4ddh9t27dzP9u2rQpbdq0ISwsjG+++YZy5co5rVyTJ09m7Nix5tcpKSkSFAkhhCi6fvoJIiP1aSdOQFiYU4pTHDm9ySwzPz8/6tatyz///ENwcDBpaWkkJSXp8iQmJhIcHAxAcHBwtlFnptd55fHx8ckx6PLw8MDHx0e3CSGEEEWOUtCunT4Y6tlTS5dgyCZFKiC6du0ax44do3LlyrRq1YoyZcqwYcMG8/4jR45w8uRJwsPDAQgPD2f//v2cP3/enCc2NhYfHx8aNmxozpP5HKY8pnMIIYQQxdLff4OLC2zffidtyxb44QfnlakYc2pANH78eDZv3syJEyfYsWMHjzzyCK6urvTv3x9fX1+GDh3K2LFjiYuLY/fu3Tz55JOEh4dzzz33ANClSxcaNmzIoEGD2LdvH+vXr+eVV15h5MiReHh4APDcc89x/PhxJk6cyF9//cXcuXP55ptvGDNmjDNvXQghhMi/8eOhXr07r/39IS0N2rd3XpmKOaf2ITp9+jT9+/fn0qVLBAQE0K5dO3755RcCAgIA+PDDD3FxcaF3796kpqYSGRnJ3Llzzce7urqyevVqhg8fTnh4OF5eXgwZMoTXX3/dnKdGjRqsWbOGMWPGMHv2bEJCQli4cCGRWdtahRBCiKLuyhWoWFGftnChNuu0KBCnzkNUXMg8REIIIZzuiy9g8GB92sWLWu2QsKhYzUMkhBBCiFykp0OVKvpg6PnntY7TEgzZjax2L4QQQhRVO3dC27b6tIMH4X8Dh4T9SA2REEIIURT16aMPhu66CzIyJBhyEKkhEkIIIYqSU6egWjV92urV0KOHc8pTSkgNkRBCCFFUzJiRPRi6cUOCoUIgNURCCCGEs924AV5e+rSZM2HCBOeUpxSSgEgIIYRwph9+gIce0qedPg1VqzqnPKWUNJkJIYQQzqAUtGihD4b69tXSJRgqdFJDJIQQQhS2AwegSRN92q5dcPfdzimPkBoiIYQQolCNHKkPhkJDtckXJRhyKqkhEkIIIQrDxYvwv7U6zZYsgQEDnFMeoSM1REIIIYSjLViQPRi6ckWCoSJEAiIhhBDCUdLSwMcHhg27k/bii1rHaT8/pxVLZCdNZkIIIYQjbN4MHTvq0/7+G+rUcUpxRO6khkgIIYSwJ6WgWzd9MNSxo7YOmQRDRZbUEAkhhBD2Eh8PNWvq02JjISLCOeURVpMaIiGEEMIepkzRB0Pu7nDrlgRDxYTUEAkhhBAFcfWq1nE6szlzYNQo55RH5IsEREIIIUR+ffstPPqoPi0hAYKCnFMekW/SZCaEEELYKiMD6tbVB0NPPaV1qJZgqFiSGiIhhBDCFnv2QMuW2dOaN3dKcYR9SA2REEIIYa0nntAHQ/Xrg9EowVAJIDVEQgghRF4SEqByZX3at99C797OKY+wO6khEkIIIXLz0UfZg6GrVyUYKmEkIBJCCCEsuXUL3Nxg9Og7aVOnah2nvb2dVizhGNJkJoQQQmT1008QGalPi4+H6tWdUhzheFJDJIQQQpgoBffdpw+GevTQ0iUYKtGkhkgIIYQAbSX6evX0aVu2QPv2zimPKFRSQySEEEJMnKgPhipUgLQ0CYZKEakhEkIIUewZjbB1K5w7pw0Ia98eXF2tODApSQt+Mlu4EIYOdUQxRREmAZEQQohiLSZGGwh2+vSdtJAQePJJrR/0tWvQrh08/7y2AL3Zl1/CoEH6k128CP7+hVJuUbQYlFLK2YUo6lJSUvD19SU5ORmfrCsaCyGEcAqjEd54A6ZNs/6YTp1g3ep03OtWhzNn7uwYORI+/tjuZRTOZcv3t9QQCSGEKHZiYuCZZ+DyZduOuxH3C+5e4frEAwegUSP7FU4US9KpWgghRLESE6NNEm1rMPQ1j/ELd4KhU8GttVXrJRgSSA2REEKIYsRohBdesO2YqpzmNKG6tJ78wLoLPblxO0u/IlFqSQ2REEKIYmPgQH3Xn7xM5J1swZAn11lDT4xGmDvXzgUUxZbUEAkhhCgWJk6Er7+2Lm85bnADL/3xvMO7TNSlHTtmr9KJ4k4CIiGEEEVeWhp88IF1eXuwmtU8qEsL4RRnCMmW99o1e5ROlATSZCaEEKLImztX6z+UO8VvtNYFQ1/zGAaUxWAIIDbWmvOK0kACIiGEEEXe0aO572/IQRQutGa3Oa0Nv9CP3NvYzpzRZrgWQgIiIYQQRd7x4znvm8MoDtLY/PoUIbhxm19pY9W5z50raOlESSB9iIQQQhRpRiPExWVP9+ciFwnQpT3OFyzhcZvOX7lyQUonSgqpIRJCCFGkbdwIqan6tKEszBYM+XHF5mAoNFQWtBcaqSESQghRaPKzKv0XX9z5txu3OU8gFUgyp81kAi8yM1/l6dcv7+uL0kFqiIQQQhSKmBioXl1bYHXAAO1n9epaem6uXtV+tmMrt3HXBUN1OZLvYAhg2TIZZSY0EhAJIYRwuOXLtfXHTp/Wp585A3365B4UtW0Lq+nBVu4zp22iAwYyOErdApXr1CkZZSY0RSYgmjFjBgaDgejoaHParVu3GDlyJP7+/nh7e9O7d28SExN1x508eZIePXrg6elJYGAgEyZMID09XZdn06ZNtGzZEg8PD2rXrs3ixYsL4Y6EEEIAfPst9O9veZ9S2s/o6Bxqak6cYMJEAz340Zz0AD/RiU2AwS7lk1FmAopIQPTbb7/x2Wef0bRpU136mDFj+OGHH1i+fDmbN2/m7NmzREVFmfcbjUZ69OhBWloaO3bs4PPPP2fx4sW89tpr5jzx8fH06NGDTp06sXfvXqKjo3n66adZv359od2fEEKUVjEx8OijuTdLKZVDTc20aVCjhvllGmXw4BY/84BdyyijzAQAysmuXr2q6tSpo2JjY1WHDh3U6NGjlVJKJSUlqTJlyqjly5eb8x4+fFgBaufOnUoppX788Ufl4uKiEhISzHnmzZunfHx8VGpqqlJKqYkTJ6pGjRrprtm3b18VGRlpdRmTk5MVoJKTk/N7m0IIUeqkpioVEKCUFvLkvS1d+r8DU1Ky7RzFR1afx9rNYFAqNFSp9HSnPibhQLZ8fzu9hmjkyJH06NGDiIgIXfru3bu5ffu2Lr1+/fpUq1aNnTt3ArBz506aNGlCUFCQOU9kZCQpKSkcPHjQnCfruSMjI83nsCQ1NZWUlBTdJoQQwnoxMVClCly4YP0xlSujta/5+OjSP3k1gY953q7lM/yvtW3WLBllJjRODYiWLVvGH3/8wfTp07PtS0hIwN3dHT8/P116UFAQCQkJ5jyZgyHTftO+3PKkpKRw8+ZNi+WaPn06vr6+5i00NDRf9yeEEKVRTIzWgfrSJeuP8fPJoMNz9bX2NZMnngCl+Hl/UI7HWWPwYAjJspRZSIgWe2XqhSFKOafNQ3Tq1ClGjx5NbGwsZcuWdVYxLJo8eTJjx441v05JSZGgSAghrGA0wrBhth3TnD3sSWkJmSvj//gDWrQAwMsr/+VxcYEFC7RaIFvnPxKli9NqiHbv3s358+dp2bIlbm5uuLm5sXnzZj766CPc3NwICgoiLS2NpKQk3XGJiYkEBwcDEBwcnG3Umel1Xnl8fHwoV66cxbJ5eHjg4+Oj24QQQuRt0ybbaob+j6fYQ8s7CfXqaVHV/4IhgGbN8l+ecePA3V0Lfjp21Ea7dewowZDIzmkBUefOndm/fz979+41b61bt2bgwIHmf5cpU4YNGzaYjzly5AgnT54kPDwcgPDwcPbv38/58+fNeWJjY/Hx8aFhw4bmPJnPYcpjOocQQgj7yfLrNkeBJKIw8BSL7iR+8w389ZdWrZNJlSq2l8PFBSZMgJn5n7NRlDJOazIrX748jRs31qV5eXnh7+9vTh86dChjx46lYsWK+Pj48PzzzxMeHs4999wDQJcuXWjYsCGDBg1i5syZJCQk8MorrzBy5Eg8PDwAeO655/j444+ZOHEiTz31FBs3buSbb75hzZo1hXvDQghRwsXEaJ2U8zKKOczhBX1iSgqUL28xf9WqtpXjiSfgs8+0miEhrFWk1zL78MMPcXFxoXfv3qSmphIZGcncuXPN+11dXVm9ejXDhw8nPDwcLy8vhgwZwuuvv27OU6NGDdasWcOYMWOYPXs2ISEhLFy4kMjISGfckhBClEimjtS5cSeVa3hThjuT507jNTrETaOj5VgI0Pr7hIRkn+XaEoNBq6WSJjFhK4NSpnlCRU5SUlLw9fUlOTlZ+hMJIUQWRiMEBeXed6gzP2ebULEGxzGG1iA+Pu8AJiZGW+LD2m+suDitr5Ao3Wz5/nb6PERCCCGKt9w7Uis20UEXDK2hOwYUJ6hh9TxAUVHaMPmKFa0rkyzHIWxVpJvMhBBCFH2bNllOr83RbIuv3sdmtnIfrq5aH2pb5gGKigJfX8gy165FshyHsJXUEAkhhCiQjIzsaTN4URcMJeGLO6nmFevffjt/kyJ27Kj1JzLksK6rwQChoVq/IyFsIQGREEKIAvH3v/NvX5JQGHiRO+Pdn2E+FUjiNneGfUVH5+9arq4we7b276xBkSzHIQpCAiIhhBAFcvKk9nMAS0iigm5fJS6wkGd0aabJEvPL1J8o63B8WY5DFISMMrOCjDITQgjLjEaoEmTk90vVCeXOuPhPGMEoPsmW/8EHYdUq+11bluMQubHl+1s6VQshhMi3vfN/JfFSG11aY/ZzkMbZ8vbsab9gCO4sxyGEPUhAJIQQpYzdalb69qXVN9+YX+6mJa35Hcje47llS/jhh/yXWQhHk4BICCFKkZgYGD1aP+tzpUowdy48+qiVJzlzRuuwk8mDrGI1D+Z4yPvv56OwQhQi6VQthBClhGl5jaxLYFy8CI89BhMnWnGSd9/NFgyVd7meazDk6gpt2+ajwEIUIgmIhBCiFDAaYdiw3PO8+642SsuiGze0ce2Zo6YZM9gUp7iW4ZnntXfssK28QhQ2CYiEEKIUyH15jTtGjNACGJ01a8DLS5928iS8+CKnTll3/X//tS6fEM4iAZEQQpQCGzdal+/CBa3DNaCtpNq6tTY8zMS0wmpoKAC7dll33pEjtSY7IYoq6VQthBAlRG6jx7Zts/48K1dCx8BD0KiRfsfOnXDPPboka2eyu35di6UcNXGizEkkCkoCIiGEKAEsjR4LCYEPP4QKFWDLFuvPVXPW8zDr4zsJVapobV5u2b8y6tSxrZzR0fDww/YNVnK699mzZdZqYT27zFSdlJSEn5+fHYpTNMlM1UKIoiwm5k5LVkFU5BKXqKRP/O9/YdCgHI9JSwNPTwv9jnIRF2e/CRVzunfTumaylEfpZsv3t819iN555x2+/vpr8+vHHnsMf39/qlatyr59+2wvrRBCiHwzGrXakYIGQ0/xf9mCoW0/XMk1GAJtTbKxY2271rlztpbOstzu3ZQWHW1bsCZKL5sDok8//ZTQ/3Wmi42NJTY2lrVr19KtWzcmTJhg9wIKIYTI2dat2ecVsoUbt7lERf6Pp81p7zIeA4pTV/2sOsfMmTBhArhY+Y1y9Gg+CmpBXveuFJw6lamTuBC5sDkgSkhIMAdEq1ev5rHHHqNLly5MnDiR3377ze4FFEIIkbOC1La0Yyu3caciV8xpdTnCRN4FtM7J1po5E65dA2t6FSxYYJ9aG2vv3V41UqJkszkgqlChAqf+N/HEunXriIiIAEAphVHqJYUQolAdOZK/436gJ1u5z/x6C+0xkMFR6mIwaKPq27e37ZzlysG4cXnnO33aPrU21gZstgR2ovSyeZRZVFQUAwYMoE6dOly6dIlu3boBsGfPHmrXrm33AgohhMjOaISBAyFTl06rhHGCE9TQpXVhPbF00aXNmpW/kWDWjjqzR61N+/baaLIzZyz3IzIYtP22BnaidLK5hujDDz9k1KhRNGzYkNjYWLy9vQE4d+4cI0aMsHsBhRBC6MXEQFCQ7cHQq7yuC4aMuFCWm9mCoa+/zv/IrMKstXF11YbWw51RZSam1/kN7ETpY/Ow+y1bttC2bVvcssxHkZ6ezo4dO7jvvvtyOLL4kmH3QoiiwrRAqy28uMY1yuvSRjOLjxhtMX9BhsUbjVC9et61NvHx9gtULM1DFBqqBUMy5L50s+X72+aAyNXVlXPnzhEYGKhLv3TpEoGBgSWyH5EEREKIosAUbNgyquwRYohBH0EFc45EgnM8ZulS6N8/n4XkztxAoA+KHDk3kMxULSxx6DxESikMWesm0QIir6yL/wkhhLAbW4bYG8jgIA11wdDnDMaAyjUYgoI3Z0VFaUFP1ar69JAQx02U6Oqq1Wr176/9lGBI2MrqTtVR//sEGwwGnnjiCTw8PMz7jEYjf/75J23btrV/CYUQQgDw3nvW5WvGXvbSQpfWkt3soWWex7q52acTclSUtkSHI2ptpDZIOILVAZGvry+g1RCVL1+ecuXKmfe5u7tzzz338Mwzz9i/hEIIIZg4EdasyTvfQoYylP+YX/9NHRpwmAysixiqVbNfcGGqtbEnWbdMOIrVAdGiRYsAqF69OuPHj5fmMSGEKCRpafD++7nnCeA85wnSpT3KN3zLozZdq0uXvPM4S07rlp05o6XLumWiIOyyuGtJJ52qhRAFVZBmnvvv10Z+5WQEn/AJo3Rp5UnJNrLMGjduaBMsFjV5dSh3xOg1Ufw5tFN1YmIigwYNokqVKri5ueHq6qrbhBBC3GE0wrRp4OcHnTrBgAHaz7AwrcYjLxMn5hwMuZPKLTx0wdDrvIoBla9g6N57i2YwBLJumXA8m2eqfuKJJzh58iSvvvoqlStXtjjiTAghhBbwDBmirfGV1Zkz2nxCK1bk3MyTlpZzR+r72cAGInRpNTlGPDXzXd6NG/N9qMPJumXC0WwOiLZt28bWrVtp3ry5A4ojhBAlg7UTKA4bpo3GslTB/uyzliY3VGygM/dzp9poLV3pzo9Awf5A3bHD/p2g7UXWLROOZnOTWWhoKNLtSAghcmY0aiOhrHHpEmzaZPkcy5fr02pzFIWLLhjqwCa6s5aCBkNQtGtXTOuW5dQokd8FaYUwsTkgmjVrFpMmTeLEiRMOKI4QQhR/tkygCJabqrZuhevX77x+m8kcpa75dTI+uJPKFjoUoKR6Rbl2RdYtE45mc5NZ3759uXHjBrVq1cLT05MyZcro9l++fNluhRNCiOLo1KmC5zfV1viQTDJ+un3D+IwFDMtf4XJQHFaFN82AbWkeIlm3TBSUzQHRrFmzHFAMIYQoOWbOtC1/errWRJa5dqNyZejPUpYyUJe3Ehe4RCU7lFJv9uziUbviyBmwRekm8xBZQeYhEkJYKy0NMq1sZDV/f5g//3+1HEYjqmZNDCdPmvfP4zlGMM9+Bc0kOho+/NAhpxbCqRw6DxHAsWPHeOWVV+jfvz/nz58HYO3atRw8eDA/pxNCiBJj7tz8HXfpkjYqLe6dX8HNTRcMNWa/w4IhgJ49HXZqIYoNmwOizZs306RJE3bt2kVMTAzX/jfBxr59+5gyZYrdCyiEEMXJsWP5P/Yr+tFpUhvz6z00xwUjB2lsh5IJIXJjc0A0adIk3nzzTWJjY3F3dzen33///fzyyy92LZwQQhQ3tWrZfkwVzqAw0I+vzWn731xJS/ag8leRb5OEBIdfQogiz+b/afv37+eRRx7Jlh4YGMjFixftUighhCiuRoywLf843uMMIbq0iSOu0XDSQ1SpYseC5eLChcK5jhBFmc2jzPz8/Dh37hw1atTQpe/Zs4eqVavarWBCCFEcubpqm9GYe76y3OQmnrq0ybzNDCZjmAf/XoQrVxxY0EwCAgrnOkIUZTbXEPXr148XX3yRhIQEDAYDGRkZbN++nfHjxzN48GBHlFEIIYqNrVvzDoa68WO2YCiUk8xgMqAt1/HNN3DzpqNKqVeQfk9ClBQ2B0Rvv/029evXJzQ0lGvXrtGwYUPuu+8+2rZtyyuvvOKIMgohRLGR+/IXil3czY/0MKfE8AgGFKcJdXjZcjJ1qrb2mhClWb7nITp16hT79+/n2rVrtGjRgjp16ti7bEWGzEMkhLDWpk3QqVP29AYc4hCNdGnh7OAXwgunYLkwGLTZnuPji+8Eh0ajTNYosnP4PESgLfLavXt3evfuzfXr17lSWI3dQgjhIEajFtB89ZX2M6+mL0tMi5BmNovRumDoLJVx43ahBEMGA/Ttm3sepbTlQ7ZudXhxHCImBqpX1wLRAQO0n9WrS62XsI3NAVF0dDT/93//B4DRaKRDhw60bNmS0NBQNllaslkIIYoBe32pZl6EtAKXURgYzUfm/YP5nKqcxWj7mJZcTZuWPRALDdXW/nr4YevOUZRXu89JTAz06ZN9Md0zZ7R0CYqEtWwOiL799luaNWsGwA8//MDx48f566+/GDNmDC+//LJN55o3bx5NmzbFx8cHHx8fwsPDWbt2rXn/rVu3GDlyJP7+/nh7e9O7d28SExN15zh58iQ9evTA09OTwMBAJkyYQHp6ui7Ppk2baNmyJR4eHtSuXZvFixfbettCiBIsJkabJTrrl+rp0/n7Uo2Kgt9H/IfL+OvSK3CZL7D/4JPQUHj5ZThxAuLiYOlS7Wd8vFYWa1exL8qr3VtiNGoLvVrq+GFKi47OX02fKIWUjTw8PNSpU6eUUko988wzavTo0UoppY4fP67Kly9v07lWrVql1qxZo/7++2915MgR9dJLL6kyZcqoAwcOKKWUeu6551RoaKjasGGD+v3339U999yj2rZtaz4+PT1dNW7cWEVERKg9e/aoH3/8UVWqVElNnjzZnOf48ePK09NTjR07Vh06dEjNmTNHubq6qnXr1lldzuTkZAWo5ORkm+5PCFH0pacr5eWllPYVankLDdXyWSUtTamKFXUneI+xuZ6/oNuKFXnfY0iIUgaD5eMNBhvvsYiIi7Pu+cTFObukwlls+f62OSCqVq2aWr9+vUpPT1ehoaFq9erVSimlDhw4oPz8/GwvbRYVKlRQCxcuVElJSapMmTJq+fLl5n2HDx9WgNq5c6dSSqkff/xRubi4qISEBHOeefPmKR8fH5WamqqUUmrixImqUaNGumv07dtXRUZGWl0mCYiEKLnuvdeOX6rbtmU7sC5/OTQYmjLFuvv85pucgyGDIe+gqihautS6Z7R0qbNLKpzFlu9vm5vMnnzySR577DEaN26MwWAgIiICgF27dlG/fv1811QZjUaWLVvG9evXCQ8PZ/fu3dy+fdt8foD69etTrVo1du7cCcDOnTtp0qQJQUFB5jyRkZGkpKSYF5rduXOn7hymPKZzWJKamkpKSopuE0KUPN98A9u3W5f3zJk8Mjz0ELRrd+f1vfdivJ3BybL18l0+a8yZk3eTXkwMjB1reV9IiNbPKCrK/mVztJLaFCicw+ZefVOnTqVx48acOnWKRx99FA8PDwBcXV2ZNGmSzQXYv38/4eHh3Lp1C29vb7777jsaNmzI3r17cXd3x8/PT5c/KCiIhP8tvJOQkKALhkz7Tftyy5OSksLNmzcpV65ctjJNnz6dadOm2XwvQojiw2iEoUOtz5/T8hbG4//iWqu6Lm3fzPUcCulC4GZtlJcjXb6s9XPKKagxdTrOaYKVDz4onsEQ3BnRd+aM5fszTSfQvn3hl00UP/ka5tCnTx9A6/RsMmTIkHwVoF69euzdu5fk5GS+/fZbhgwZwubNm/N1LnuZPHkyYzP9OZWSkkJoqPMmTRNC2N/WrXDtmvX5LS1vcaj/GzRc9pr5dQYGPLlB6sSydiihbaKjoWdP2LHjzlw8bdvm3OkYtIBh7Fh45JHiOWePaURfnz7avWS+T1MgOmtW8bw3UfhsbjIzGo288cYbVK1aFW9vb44fPw7Aq6++ah6Obwt3d3dq165Nq1atmD59Os2aNWP27NkEBweTlpZGUlKSLn9iYiLBwcEABAcHZxt1ZnqdVx4fHx+LtUMAHh4e5pFvpk0IUbL8+69t+XVLNV67BgaDLhiK5kNcySCVwg+GTPMIVa2qnzagatXsI+csHVdc5x8CrXbr22+zvD8U76ZA4Rw2B0RvvfUWixcvZubMmbi7u5vTGzduzMKFCwtcoIyMDFJTU2nVqhVlypRhw4YN5n1Hjhzh5MmThIdrk5mFh4ezf/9+zp8/b84TGxuLj48PDRs2NOfJfA5THtM5hBCl04oVtuU3N7t89x2UL6/bV5mzzCbaLuUqiIsXc3+dk+I4/1BmUVE5TzkghNVs7bFdq1Yt9fPPPyullPL29lbHjh1TSmkjwGwdZTZp0iS1efNmFR8fr/788081adIkZTAY1E8//aSU0obdV6tWTW3cuFH9/vvvKjw8XIWHh5uPNw2779Kli9q7d69at26dCggIsDjsfsKECerw4cPqk08+kWH3QghVu7b1I7k8PZVSRqNSDRvqdnzOIIeOICusTYali5LKlu9vm/sQnTlzhtq1a2dLz8jI4Pbt2zad6/z58wwePJhz587h6+tL06ZNWb9+PQ888AAAH374IS4uLvTu3ZvU1FQiIyOZO3eu+XhXV1dWr17N8OHDCQ8Px8vLiyFDhvD666+b89SoUYM1a9YwZswYZs+eTUhICAsXLiQyMtLWWxdClCCpqdbnnRi5D1yb69Ja8Tt/0Mq+hSpk0ulYiDtsXty1VatWjBkzhscff5zy5cuzb98+atasyeuvv05sbCxbi3NjdA5kcVchSp7gYMjSvdCiBTzN09zpH/kPtajHETIo3j11TZ2OpZ+NKMls+f62uYbotddeY8iQIZw5c4aMjAxiYmI4cuQI//3vf1m9enW+Cy2EEIUlJibvYCiA85xHP2XHY3zNch5zYMnyVr48XL1653VAQM5TAmSWNV9IiDYCS4IhITQ21xABbN26lddff519+/Zx7do1WrZsyWuvvUaXLl0cUUankxoiIUoOo1FbtDW30VfDmctcRurSAj2SuZDq/P//3t766QKqVoVbt+DSpZyPCQ2Ff/7RD8lv316Go4uSz2E1ROnp6bz99ts89dRTxMbGFqiQQgjhaEajNqQ8cxCwdWvOwZA7qSTjS1nudDB6k5d5lTfBhj5HjpR17qSzZ3OeZ8ikXz9wd4eOHR1WLKew9P5KkCfyy6Zh925ubsycOTPbavJCCOEMRiNs2gRffaX9zLyqeUyMVjOSeV6e0FBYudLyuTqxkVTK6oKhmhzTgqEizJo6/mXLSt6K7zExWk1f5ve3evW8lzERIic2z0PUuXNnp88kLYQQMTEQFqb/QgwL09K//hp6984+v865c1q/GT3FBu5nI53NKevpgoEM4qnp6NsoFMV98sWsTMuRZK3pO3NGS5egSOSHzZ2qu3XrxqRJk9i/fz+tWrXCy8tLt/+hhx6yW+GEEMKSmBgt4MnqzBnL6TmpyTGOoZ9GpCNxbKZjwQpoZ1mXpciP4j75oonRmPNyJEppzyo6Gh5+WJrPhG1s7lTt4pJzpZLBYMBY0uplkU7VQhQlRiMEBeXeidgab/ESLzHd/Poq3vhzidu453KUc0ydqm0FERdXMvoQbdqk1QbmpaTcrygYW76/bW4yy8jIyHEricGQEKJo2bSpYMGQD8koDLpg6Dnm4cNVi8FQZKRjaxoqVoRp02DcuOzXcXWFCRPglVe0YfKmuYNsYTBofadKyuSL1tZ0lZQaMVF4bA6IhBDCmT75JP/H9uMrkvHTpQVwns94LsdjunaFwMD8XzM3lSrBp59C48bwwQfZOz4bjfDee1pH8NmztbSsQVHm1zntK0krvleubN98QpjkKyDasGEDPXv2pFatWtSqVYuePXvy888/27tsQgihYzRqa6vaygUj8VTnKwaY0z7lWQwoLhKQ83EuMGIE1KuXn9Lm7dIleOwxGDYs9z5Cpj4xllZ1r1pVW6h2xYrSseJ7+/a515aVtBoxUXhsDojmzp1L165dKV++PKNHj2b06NH4+PjQvXt3PinIn25CCJGHadNsP6YVv2PEjer8a05rwp8M59M8jx03Tpu/p1Yt269rDVMQlFsToFL6UWJZAyfT69Ky4rura961ZSWpRkwUIltXjq1ataqaM2dOtvSPP/5YValSxdbTFQuy2r0QzpeerpSbm22ruC+hvy5hD82UAaNVx7q7a9dUSqmHHsrfKvJDhij1yiv2WZE+OlopgyF7usGgbStWOPXtKXQrVigVEqJ/FqGhpe85iNzZ8v1tcw1RUlISXbt2zZbepUsXkpOT7RCiCSFEdhs3grVzwlbmLAoDA/jKnPYw39OCvSgrK8bT0u7Uyty4YWtpNZGR0LBh/o7N6ssvcx5qDlqzWmka11JaasRE4bE5IHrooYf4zkIj/sqVK+nZs6ddCiWEEFnNn29dvrG8z1n0nWm8uMYqHrb5mqaRSq1b23wooHXsLWjnXoNBW5j14sWc82RtVistXF21ofX9+2s/pZlMFITNEzM2bNiQt956i02bNhEeHg7AL7/8wvbt2xk3bhwfffSROe8LL7xgv5IKIUq1dety31+Wm9zEU5f2Em8xnZfyfU3T6LJOnWDGDOuPMxi0jr+mjr0hIdqkkbZOrmjqEzNwoKUZtrNz9lBzWVtMFGc2T8xYo0YN605sMHD8+PF8FaqokYkZhXA+V1fIyLC8rytrWUt3XVooJzlNaIGuWbUqfPSRtsJ8ZKT1xxkM+tFdpqUmwLagKDRUC4QqViz6kxHGxGgzSGdeTiMkROsALc1Ywlls+f62OSAqjSQgEsL5LAdEip2Ecw+7zCnf0YsoLI/N9/eHUaOsH61mqqGJitKGtVvDFMRkDQIsBQyWBATAhx9qwZiphsWa2bn9/SEx0Tk1MqaAL+u3ien5lbSh/6L4cOhM1UII4QweHvrX9TmMwkUXDLVlu8VgyNtbC4ISE+Gee6y/pukL/qefrMs/aFDOHXujomDx4rzPceGCFgwVlz4xea0tBqWvw7coniQgEkIUGUajtjTHV19pPzN/iYaE3Pn3h0RzmDvDtxIIwo3b7KStxfNev67NBu3qCkuW2FYmpeDqVevyDhmSexBz/rx158naF2jr1ryXK7l0ybpO1bk94/zYujX3Wq/S2uFbFD8SEAkhioTlyyE4WOsrM2CA9rN6da05BrQlLCpwGYWBaGabjxvCYiqTgDGPMSKmWopr1/JXPm/v3Pf7++fdfye/y07Ya/2umBjtmeb0jPND1hYTJYUEREIIp5s4UVvCIuvQ8tOntb4pMTHQ88IiLuOv21+By/yXIXmeP3MtRbt2+SvjhAm57//007ybuPK77IQ91u8y9fPJWptz5sydZ5wfsraYKCkkIBJCONW338K77+a831XdpsOjAbg8/ZQ57UOiMaBIooJN1zp3Dp5/XlujzFqmIOXll7WO1Zmb7jIbMybvoMLVVVvE1VJ/m9yWnSjo+l2O7Ocja4uJksLmgGjdunVs27bN/PqTTz6hefPmDBgwgCtXrti1cEKIks1ohKFDc97flu3cxh3/jDtVRz/NPsw03w/zdb3KlbW1ycaNsy5/1iAlKkoLaCyxpqYlJgbGjrW8L7eFWAu6fpcj+/nI2mKipLA5IJowYQIpKSkA7N+/n3HjxtG9e3fi4+MZm9P/dCGEsGDTJvjfr5NsvqMX27nTvnWhTjhkZNDlhfpcugQ//wyvvKJt69fbVksxc6bWBJZXTVHWIMVozDmgyaumJacmK5MPPsh9aHpUlOXV7q1Z0d7R/XwKUjYhigqb5yHy9vbmwIEDVK9enalTp3LgwAG+/fZb/vjjD7p3705CQoKjyuo0Mg+REI7x8svw9tv6tFBOcpIwXVpX1jLh56507pzzuXKa/DC3uXDS0mDuXDh2TFvR/tlnYdeunGda3rQpfxMkGo0QFqbVIllimtk6Pj7vmpT8zAad33LbSmaqFkWNLd/fNi/d4e7uzo3/rXT4888/M3jwYAAqVqxorjkSQghrnDypf/0Sb/EWr+jSynGDW5RjbB79W0y1FJZmS7Y0USJozWfR0fq03AKC/Na0vPVWzsEQ6Jus8gpITOt32cLUzyen5UOyLjWSX/kpmxBFhc0BUbt27Rg7diz33nsvv/76K19//TUAf//9NyE59TYUQggLTL8yPLnOdfTj2sfwAbMYY369dSt06ZL7+aKi4OGHHVdLkZ8RVTExMGWKdcc5ami6qZ9Pnz5a8GOpBk36+YjSzuY+RB9//DFubm58++23zJs3j6r/azReu3YtXbt2tXsBhRAl1+nT8DDfZwuGKnNWFwzZwpEroNs6oso0ustajhyaLv18hMidrGVmBelDJIT9vTghg8ffa0YTDpjTvmQgg/jSYv6ffybXPkSFJa+FWr/5Bh59VPu3tX13QAukrOlDVFDSz0eUJg7tQwRw7NgxFi1axLFjx5g9ezaBgYGsXbuWatWq0ahRo3wVWghResS+/yfvvNdMl9aa39hNa4v5y5YtOn1TcuqrZDJ27J0h+rY0gRVWk5X08xHCMpubzDZv3kyTJk3YtWsXMTExXPvfPPj79u1jirUN5UKIUss4dBgPjL8TDB2jJq6k5xgMAbz4YtGqxYiK0laktyTzfETWNoFNmyZNVkI4m81NZuHh4Tz66KOMHTuW8uXLs2/fPmrWrMmvv/5KVFQUp3Ob/auYkiYzIQrOeO48rlWCdGl9WcY39M31OG9vSEoqWgGR0aitAZbbr7uQkDvD+XMa3WXKd+JE0bo/IUoKW76/ba4h2r9/P4888ki29MDAQC5mXYhICFEiFHSF9N3PfJotGPIhOc9gCODzz4tesJDXzM+g7Z8xI/dZnA0GbX9Ruz8hSiObAyI/Pz/OWWgY37Nnj3nEmRCi5CjQCulpaaS6edJq4XBz0lu8hAHFVfKubY2KKppNSdb2DTL1IpDRXUIUfTZ3qu7Xrx8vvvgiy5cvx2AwkJGRwfbt2xk/frx5kkYhRMlgGlGVtbnHtAp9rl/ocXFw//14ZEqqxT8cp5bV1x8xwuYiFwpbhsdHR2ujxxw5P5IQouBs7kOUlpbGyJEjWbx4MUajETc3N4xGIwMGDGDx4sW4lsD/4dKHSJRG1vST8feHxMQsX+xKaTMo/vyzOSmWCLrwE5DDBD4WFMW+QybWPJvMCrokhhAifxzah8jd3Z0FCxZw7NgxVq9ezZdffslff/3FF198USKDISFKK2v6yVy6pC1LYXbsmLZiaqZgqBMb6UIstgRDAOPHF81gCPQrvFvDUTNQCyHsJ1/zEAFUq1aNatWq2bMsQogiJLe1tzKbMQPuuQeqzn2ZRivvrNR6y9UTH+MVbuOer+svWABNmhTdPjZRUdpweWtmG3HkDNRCCPuwqsls7NixVp/wgw8+KFCBiiJpMhOlhWn19/XrYd06644pTwop+OrS9jwzlw9vDeeLL/JfltxWqS8q8mo6s2UVeyGE/dl9puo9e/ZYdWFDTgv8CCGKvIkT4d13bTumL8tYRn9dWiCJXFwYSO/eBSuPUlpAER2tdUguigFF5kVTQRZNFaI4k7XMrCA1RKKkMdUEmSYOjI+Hjz6y/ngXjBylDjWJN6fN5xmeZb4DSlv0OyXHxGRfyiM0VAuGimrtlhClgcPXMhNCFF8TJsAHH0BGRv6Ob8nubMtsNGUf+2lqh9JZVtQ7JUdFybB6IYo7CYiEKEW6dbO+b5AlX/A4j7PE/PpPmtCcvSjbB6zapDh0SpZFU4Uo3iQgEqKU8PSEmzfzd2ww5zhHFV1aL75jJb0KXrBcmDolt2/v0MuUaEaj1FwJYQ3H/lknhCgSDIb8B0PRfJgtGPLiWqEEQyCdkguiQMuuCFHKSEAkRAnn6Zm/4zy4hcLAh9yZduNl3sSA4gZediqdplOn7Gt9VagAU6dqfXOE7UzLrmSdEuDMGS1dgiIh9JwaEE2fPp277rqL8uXLExgYSK9evThy5Iguz61btxg5ciT+/v54e3vTu3dvEhMTdXlOnjxJjx498PT0JDAwkAkTJpCenq7Ls2nTJlq2bImHhwe1a9dm8eLFjr49IZwuISF/NUORrOMW5XRp1fiXt3nZTiXTe+YZ+PdfbaLDihW1tMuXtUkPpUbDdkajNurN0hhiU1p0tJZPCKFxakC0efNmRo4cyS+//EJsbCy3b9+mS5cuXL9+3ZxnzJgx/PDDDyxfvpzNmzdz9uxZojKNYzUajfTo0YO0tDR27NjB559/zuLFi3nttdfMeeLj4+nRowedOnVi7969REdH8/TTT7N+/fpCvV8hCluzZrYeodhBOOvoZk75nocxoDiF42amr1wZVq7UaoQuX9bvkxoN2+W17IpScOqUlk8I8T+qCDl//rwC1ObNm5VSSiUlJakyZcqo5cuXm/McPnxYAWrnzp1KKaV+/PFH5eLiohISEsx55s2bp3x8fFRqaqpSSqmJEyeqRo0a6a7Vt29fFRkZaVW5kpOTFaCSk5MLdH9CFDZXV6W0r7+8t3oczpbYlm1WH5/fLTRUqdRUpUJCcs5jMGj50tO1+0pPVyouTqmlS7WfpnShWbrUume/dKmzSyqEY9ny/V2k+hAlJycDUPF/dea7d+/m9u3bREREmPPUr1+fatWqsXPnTgB27txJkyZNCAoKMueJjIwkJSWFgwcPmvNkPocpj+kcWaWmppKSkqLbhCjqjEbYsAFefVXbNmywvknkA8bwFw3Mr88TgBu32cG9DirtHbNmwY4d1tdoSEfhvFk7TUFxmM5AiMJSZAKijIwMoqOjuffee2ncuDEACQkJuLu74+fnp8sbFBREQkKCOU/mYMi037QvtzwpKSnctNDBYvr06fj6+pq30NBQu9yjEI4SEwOVKkFEBLz5prZl+RvAIj+uoDAwhlnmtCdYRBDnMRbCrBxRUdpm7cSLK1dKR2FrtG+vTVeQ02pKBoM2k7ZMZyDEHUUmIBo5ciQHDhxg2bJlzi4KkydPJjk52bydOnXK2UUSIkcxMdC7NyQl2XbcYD7nChV1aRW5xOc8Ybey5aVhQ+2ntTUVX34pHYWtYVpjDbIHRTKdgRCWFYmAaNSoUaxevZq4uDhCQkLM6cHBwaSlpZGU5Td9YmIiwcHB5jxZR52ZXueVx8fHh3Ll9CNpADw8PPDx8dFtQhQlaWna8hsPPYTNi6i6kk4CQbrAZxajMaCyBUiO5uenBTDW1GgEBMDFizmfSzoK60VFwbffZp/OICRES5c11oTQc2pApJRi1KhRfPfdd2zcuJEaNWro9rdq1YoyZcqwYcMGc9qRI0c4efIk4eHhAISHh7N//37Onz9vzhMbG4uPjw8N//fnZ3h4uO4cpjymcwhRnEycCB4eMG4c/PCDbceGs4N0yhDEnf8vDTikazIrTOPHa/1/Vq7Mu0Zj4EDrzlnU1z0rTFFRcOKEtjju0qXaz/h4CYaEsMjxfbxzNnz4cOXr66s2bdqkzp07Z95u3LhhzvPcc8+patWqqY0bN6rff/9dhYeHq/DwcPP+9PR01bhxY9WlSxe1d+9etW7dOhUQEKAmT55sznP8+HHl6empJkyYoA4fPqw++eQT5erqqtatW2dVOWWUmSgqJkzI/2iuGHrpEnZwj4IMh40e8/S0Lp/BoG0rVmhb1tFmoaFaelycdeeLi3P2uySEKCps+f52akAEWNwWLVpkznPz5k01YsQIVaFCBeXp6akeeeQRde7cOd15Tpw4obp166bKlSunKlWqpMaNG6du376tyxMXF6eaN2+u3N3dVc2aNXXXyIsERKIoSE3N57B2/s2W2JUfHRYImbbRoy0HODkFRaZh9TkNqU9P185lMFg3NF8IIWz5/jYoZamLosgsJSUFX19fkpOTpT+RcJqyZSE11bZjJvN2ttmly3Ej2yzUjhAXp63+bjTCnDkwZoz1x+TEtBwF3OlIDXea1aRvjBAiM1u+v4tEp2ohRO4uXLAtGPLkOgqDLhgay/sYUIUSDAUE3BnS7eoKWWa9yFFe/X+ko7AQwlEcP9GIEMJmRqM2WurMGS0YmjLF+mMfYmW2leircCbbivWONHeufki3PScKjIrSFnzdulULoCpX1oIvGUIuhCgICYiEKGJiYrSFOXObudkSAxnspTlN2W9OW0p/BrLUziXM3YQJd5q1TEzD6s+c0Td1mRgM2n5rJwp0dc29aU0IIWwlAZEQRYhpkkVbNWY/+2mqS7uLX/mdu+xUsrz5+sKCBfDoo9n3mSYK7NNHC34s9f+RiQKFEM4kfYiEKCIuX85fMPQpz+qCoXiq40q6XYMhF5ecJ00Erc/Q+fOWgyET6f8jhCjKpIZIiCKgZk1twjxbVOICFwjUpfVnKcvob8eSaTIytJ851e58+im4u+d9npLU/8fUz6u434cQQiMBkRBO5uUFN27YdswwPuMzntOl+ZJECr52LJledLRWk5O5b1NIiNbUZUvtTkno/2Opn1dIiNYsKDVdQhRP0mQmhBN17WpbMFSGNK7irQuG3mYyBpRDgyHQanZkGYg7cyFl7fR+5oyWHhPjnHIJIQpGJma0gkzMKBxh2DCtE7K1OrCJTXTSpdXmKMeoXeCyVKoEly7lPgIsPl6ahIxGbe21nEYAyrMSomiRiRmFKIJu3tSCoNBQ7YvT+mBIsZ4uumDoZzpjIMMuwVBoqDZvEOS8sKqMANNs3Zr7dAhKwalTWj4hRPEifYiEKAS9emkrutuqBsc5Ti1d2v1sII777VIug+FOHyBXV8v9YmztI1SS5TWTtq35hBBFhwREQjhYfoOh13mVV3nT/PoG5ajAFdLwyFc5KlbUhvabhIbqg52SNALMUew547YQomiRPkRWkD5EIr9u3gRPT9uOKU9Ktg7SI/iEeYwoUFl+/lkLbiTYyT9TH6K8ZtyWPkRCFA22fH9LDZEQDmRrMPQYX/M1/XRpgSRmm28oP1avhg8/zD2PI+bWKUnz9ciM20KUXNKpWgg7Mhph0yb4v//LfWbnrFww8g+1dMHQAp7GgLJLMATaF3VuQ8JjYrTaj06dYMAA7Wf16gUbRu6IczqbzLgtRMkkTWZWkCYzYY2YGHj+eTh71rbjWrKb3bTWpTVjL3/SzI6l04SGWm7OMc2tk/W3gSmoy88XvSPOWZSUpJovIUoqW76/JSCyggREIi9ffw39+uWdL6vPGcxgvjC/PkAjmvInyoGVt3Fx+pmiHTG3jszXI4QoCmQeIiEK0fPP2x4MBXMOhUEXDD1CDE04kO9gyJq1xCD7kHBHzK0j8/UIIYob6VQtRAEEB0Niom3HjGYWsxijS/PmKtfxzlcZmjaFDz7Q/h0RkXf+rEPCHTG3jszXI4QobiQgEiKfbA2GPLjFLcrp0l7ldd7k1XyXwWCA337TaoeMRq0ZKq8h4e3b69OPHrXuWrbMrSPz9QghihtpMhMiH154wbZgqAvrswVDYZwoUDAE8Nhjd5rKTEPCwfolOIxG65YQsRRI5aZ9e+2YnEbaGQxaB29bzimEEI4kAZEQNjAaYf16mDPH2iMU27iX9XQ1p6ziQQwoThJW4PI8+KD+ta1DwvPq62PyzDO2dX7OT3AmhBDOJAGREHkwGmHVKm1FeDc36No172MA6nIEhQv3ssOc1o6tPMwqu5XtwoXsaVFRcOKENpps6VLtZ3y85SHu1vbhqVPH9rLJfD1CiOJE+hAJkYuYGOjd2/bj3mMc4/jA/Poi/lTmHOmUsWPpICDAcrqrq35ofU4c3ddH1kcTQhQXEhAJkYP8BEN+XOEKFXVpT/F/LOIpO5bsjr/+0mbGzm+QYerrY2tHbFtYG5wJIYQzSZOZEBZcvmx7MDSI/2YLhipyyWHBEMCbbxZsOQzp6yOEEBoJiITIonZt8Pe3Pr8r6ZwjmP8yxJw2mxcwoLIFSI5y5oy2TEZ+giLp6yOEELJ0h1Vk6Y7S4eZNreP0jRvWH3MPO9lJW11aQw5ymIZ2Ll3eCrochqzNJYQoaWTpDiFs1KsXeHraFgx9S29dMLSLuzGQ4ZRgCAq+HIapr0///tpPCYaEEKWJdKoWpd6DD8Lq1dbnD+EUp6imS+vOGtbS3c4lyx9ZDkMIIWwnNUSiVDIaYcMGqFLFtmBoEtOzBUPluFFkgiGQ5TCEECI/JCASpU5MjNZpOiLC+toUT66jMDCdl8xp43kXAyrbkhyO5O8vy2EIIYQjSEAkShXT3ELJydYf8yCrsq1EX5XTvM94O5fuDhcL/zP9/eGp/43glyHyQghhXxIQiVIjLc3WuYUUe2jOKh42p3xFPwwozlI1l+Pyr3dvmDbN8iSJly/De+/B+PEyRF4IIexNOlWLUsHWWacbcYADNNGl3c0ufuNuO5fsDn9/be2xWrUsB0RKaTVBy5bBsWOwY4cMkRdCCHuRgEiUeF99BQMGWJ+/B6tZzZ1l5P+lGjU5TgaOjTjmz9eCnNxWnzcNrd+xQ5bDEEIIe5ImM1GitWtnfTBUjhvMZbguGBrAEqrzr8ODoZ49teYuazt5y9B6IYSwL6khEiXSzZvaRIvWasluljCQ+hwB4APG8BJvk0pZB5VQb9cubSoAR68+L4QQwjKpIRIlSloaNGxofTDkgpFJTOcX7qE+RzhDFSKIZRwfFFowBHDhgjbDtGn1eWuG1huN2kr3X32l/TQaC624QghR4khAJEqMF14ADw84fNi6/GGcYBMdmc5LlCGd5fShCfvZQIRjC5qDc+f0q89nlXlo/cqV2gr3nTppTYIFWfFeCCGEBESihPD1hTlzrM2tGMiX7KMZ7dnGVbwZwmIe45tCW53eksBAraZn61YoXz77/ooVtaH1oK1sn7XzdUFWvBdCiNJO+hCJYi8oCFJSrMvrxxXmMZx+fA3AdtoyiC+Ip6YDS5g3d3cYMkQLanJy6RJkZMCYMbkPy4+OhocflmH4QghhC6khEsXWtWtaf5vz563L35E4/qQp/fiadFx5hTfowGanB0Og9X3KLRgCLdgZMcK6Yfn5XfFeCCFKK6khEsVS8+awb591ed1J5U1eYRzv44Lib+rwOF86dJJFR1BK63xtDRmWL4QQtpGASBQ7rq5a05E1GnKQJQykOVr09BnDGMf72dYmK2lkWL4QQthGmsxEsXHtmtZsZE0wZCCD5/mI3bSiOfu4QCUeYiXP8VmxD4YqVZIV74UQwt4kIBLFQqNGlkdeWRLMOX6kOx8xmrKk8iPdaMJ+fuAhxxbSwUzBzty5d15n3Q+y4r0QQuSHUwOiLVu28OCDD1KlShUMBgPff/+9br9Sitdee43KlStTrlw5IiIiOHr0qC7P5cuXGThwID4+Pvj5+TF06FCuXbumy/Pnn3/Svn17ypYtS2hoKDNnznT0rQk7uXlT+6I/dMi6/L34jv00oSvruUlZRvAJPVhDIsGOLWgeQkKyr1Bvi8zBzqOPasPvZcV7IYSwH6cGRNevX6dZs2Z88sknFvfPnDmTjz76iE8//ZRdu3bh5eVFZGQkt27dMucZOHAgBw8eJDY2ltWrV7NlyxaGDRtm3p+SkkKXLl0ICwtj9+7dvPvuu0ydOpX58+c7/P5EwfTqZf2M015cYwFP8x1RVOISf9CClvzBPEYAObQvFaIPP4SPPtICm5yau3KTNdiJioITJyAuDpYu1X7Gx0swJIQQ+aaKCEB999135tcZGRkqODhYvfvuu+a0pKQk5eHhob766iullFKHDh1SgPrtt9/MedauXasMBoM6c+aMUkqpuXPnqgoVKqjU1FRznhdffFHVq1fP6rIlJycrQCUnJ+f39oSNatVSShtXlffWhp3qKNoBRgxqOi+qMqRafXxhbHFx2n2tWKFUSIj1x/XsqR2bnu7Md0MIIYonW76/i2wfovj4eBISEoiIuLOMgq+vL23atGHnzp0A7Ny5Ez8/P1q3bm3OExERgYuLC7t27TLnue+++3B3dzfniYyM5MiRI1y5csXitVNTU0lJSdFtovAYDHDsWN75XEnnNaaxjXbU5hj/Uo1OxDGZGdzGPe8TFCLTMPioKFi82Prjdu7UOkhLnyAhhHCsIhsQJSQkABAUFKRLDwoKMu9LSEggMDBQt9/NzY2KFSvq8lg6R+ZrZDV9+nR8fX3NW2hoaMFvSOQpLc365qSaHGMr7ZnGVNwwsoQBNGMfW+jg2ELmU+Zh8NZOJAna7NSbNtm9OEIIIbIosgGRM02ePJnk5GTzdurUKWcXqcTr3VtbmDVviif5D/toRji/kIQvA1jC4ywhGT8HlzJ/XF2hTZs7K9MnJtp2vAREQgjheEV2YsbgYG1UUGJiIpUz/XmdmJhI8+bNzXnOZ/lzOz09ncuXL5uPDw4OJjHLN5DptSlPVh4eHnhY9+0s7MDaWqGKXGI+w+iNtnrpJjowhM85SZgDS1dwRiOEhelnmXZ11dKFEEIUDUW2hqhGjRoEBwezYcMGc1pKSgq7du0iPDwcgPDwcJKSkti9e7c5z8aNG8nIyKBNmzbmPFu2bOH27dvmPLGxsdSrV48KFSoU0t0ISxISrA+GHuAn9tOE3sSQRhkm8g6d2VDkgyGTrEtu2BIMdexo16IIIYSwwKkB0bVr19i7dy979+4FtI7Ue/fu5eTJkxgMBqKjo3nzzTdZtWoV+/fvZ/DgwVSpUoVevXoB0KBBA7p27cozzzzDr7/+yvbt2xk1ahT9+vWjSpUqAAwYMAB3d3eGDh3KwYMH+frrr5k9ezZjx4510l0LgHLlrFteoiw3mcVofiKSKpzjMPW5h194l4lkUPJ7Gvv7S0AkhBCFohBGveUoLi5OAdm2IUOGKKW0ofevvvqqCgoKUh4eHqpz587qyJEjunNcunRJ9e/fX3l7eysfHx/15JNPqqtXr+ry7Nu3T7Vr1055eHioqlWrqhkzZthUThl2bz/p6dYPOW/KXrWfRuaEOYxU5bju9CH0hbmtWOHsd0wIIYovW76/DUop5cR4rFhISUnB19eX5ORkfHx8nF2cYmv5cnjssbzzGchgDB/yNi/hQRoJBPEU/2Et3R1fSCeIjtaezZkzd9JCQmD2bJloUQghCsKW7+8i26lalBxGIzz8MKxZk3feEE7xOUO4nzgAVvIQT7OQiwQ4uJTO8/DD8N57sHWrNl9R5coy95AQQhQ2CYiEQy1ZAo8/bl3eR/mGz3iWCiRxHU+imcVCnqYoLL3hCAaDVhNkCn6kr5AQQjiPBETCYWrVguPH887nQzJzeJ7BfAHAr9zF43zJUeo6uITOIyvTCyFE0VJkh92L4q1iReuCoXvZxj6aMZgvMOLC67zKvWwv0cEQyMr0QghR1EgNkbCrtDTrZpx24zZTmcokZuBKBsepwSC+YAf3Or6QTvLKK9CwofQREkKIokgCImE3Y8ZoTUB5qcsRvuRx7uJ3ABbxBKOZzVVK9gi+zp2ln5AQQhRVEhCJArt5Ezw9rcmpGMZ8PmAsXtzgMhUYxnxW0MfRRSyQV1+F+++HlSutC/iyytx5WgghRNEkAZEokAcegJ9/zjtfAOf5P4byIKsB+JnODOFzzlLVwSUsGH9/mDLlziiwtm2hb19t2kRrSOdpIYQoHqRTtcg3g8G6YKg7a9hPEx5kNam4M4YP6MJPTg2GXFzghRdg2jTtPnJaU+3TT/WBzKOPagGStaTztBBCFA9SQyRsZjSCmxWfnHLc4D3GM4J5AOynMQNYygGaOLiEefv6a+jzv5a6xo1h9Gg4fTp7vjFjtOApc0DzyiswZw5cupTz+f39tWt07Cg1Q0IIURxIDZGwybJl1gVDLdnNH7Q0B0MfMIa7+K1IBEMVK8Ijj9x5HRUFH3xgOe/p09C7N8TE3ElzdYX58y3XKplqm+bP1zpRSzAkhBDFgwREwipGIwQGQv/+uedzwcgkpvML91CfI5yhChHEMo4PSKVs4RQ2D5cvw1tv3XltNMLw4bkfM2yYli+zihWz5ytfXqttqlgxe34hhBBFlwREIk8LFmi1Qhcu5J4vjBPE0YnpvEQZ0vmW3jTlTzYQUTgFtcGUKXdqfTZtyr35C7T9mzZp/46J0ZrbLB2TkqJ1oO7UCapX19csCSGEKLokIBK5Mhi02pHcKQbyJftoxn1s5SrePMEiHmU5l/EvjGLmS3S0VotjCnTysmmTln/0aOtGmZ05owVOEhQJIUTRJwGRyFFOI68y8+MKSxnAlwzClxS205Zm7ONznqCoL8p66pS2wrwttm613PnaElPQZAq8hBBCFF0SEIlsEhKsC4Y6Esc+mtGfZaTjyqu8Tgc2E09NxxfSTs6ds3726I4dtfy2UCp/gZcQQojCJQGR0HF11dbayo07qbzDRDbQmWqc4ii1acsO3uRVjMVsJofKlbVAxz+Plj1/fy1fXs8mJ7YGUkIIIQqXBETCzGCAjIzc8zTgEL9wDxN5FxcU83mGFuzhN+4unELaicEAoaF3FlmdPz/3/PPna/nat9cmW7SmBi2z/AZSQgghCocERIKbN635gleMYg67aUUL9nKBSjzM9zzLfK7jXRjFtBtLy2lERcGKFVA1y+TZISFaumliRldXmD1bf568rmUKvIQQQhRdEhCVcqGheS/MGsw51tKNObxAOW6xlq40YT+reLhwClkATz6pBTWZ5bScRlQU/PsvxMXB0qXazxMnLOf79tvswVNWso6ZEEIUHwalrF2msvRKSUnB19eX5ORkfHx8nF0cu7GmhuNhvmchT1OJS9ykLBN4l08YSVEfQQZav5/ERO3fmzbdGV7fsWP2JTWMRq3j87lzWvOWqSktN5mPOXpUm68p8wi00FAtGJJ1zIQQwjls+f4uXj1ghV3cvJl3rZAX1/iQMTzDQgD20JyBLOEwDQuhhPbxwgtaUBMTo1+r7M03tVqi2bO1YCXrftDvz4mrq36E2ssv2x5UCSGEKBqkhsgKJamGqE4d+Oef3PPczS6WMJDaHCMDA+8ygVd5g9u4F04h7cBUO7RypTY5YtZPual2bPx4ePfdnM+Tuf+QEEKI4sWW728JiKxQUgKivJrIXEnnJd7mNV7HDSMnCWUw/2UzHQulfPlhMFieNXrFCnj4YW35jJwmUjQ9j9z+B5gCK0s1PflpZhNCCFF4bPn+lk7VpcCZM3kHQzU5xlba8zpTcMPIUvrTlD+LZDDk7a0FPJZGhYWG3qnVyWtWaaXyXoIj8xpmmcXEaMFWp04wYICsXSaEEMWd9CEq4Vxc8vrSVzzBYj7iBcpzjWR8GM48vmJAYRXRZmXLarU/rq7az5xqaew1GeKmTdC5853XpsVdsz5X09pllkawCSGEKNokICrB8qoVqsgl5jOM3mjVGpu5j8H8l5OEFULp8u/iRS0IMo0Uy2npDUdMhpjb4q5Kac88OvpOwCaEEKJ4kCazEig5Oe9gKIJY9tOE3sSQRhleZAb3s7HIB0Mm1tT+5HdWaUvnMbGmGU7WLhNCiOJHAqISpnx58PPLeb8Ht/iAMcTShSqc4zD1uYdfmMmLZODcKo3wcOvzWlP7Y+us0rmdx8TaZjhZu0wIIYoXCYhKEIMBrl3LeX8T/uR3WjOGWQB8wghasZs9tCycAuYhMlLr85SXkBDrl8LIaVbpgADry3X+/J1/W9sMJ2uXCSFE8SIBUQmQkJB7DYiBDMbwAb9xF405SCKBdGcNo/iEm+QxQ2Mhmjo178VlQav1saV/TlSUtgRH5iU5Tp8Ga2dQyBzc5NUMJ2uXCSFE8SQBUTFnMOReG1GV08TyAB8wDg/SWMWDNGE/a+leeIW0k4oV7TdRoqsrLFyYd76swU1uzXCydpkQQhRfEhAVY3n1i+nDcv6kKZ3ZyHU8GcZnPMxKLhBYOAW0s2++yV8wlNOcQa6uMGFCzscZDJaDm5ya4XJaNFYIIUTRJwFRMZRXE1l5UljMEJbzGBW5wq/cRQv2sIBhFLVFWW3p7Jy5L4+1THMGZR0ZZpoz6J57YPny7H2KQkNzD24sNcPFx0swJIQQxZUs3WGForR0R5kykJ6e8/572cYXDKIGJzDiwtu8xOu8RjplCq+QNggNhaefhilT8s4bF5fznEOWGI15L90REqIFMiDLcAghREkjq92XQEYjuOXybpXjBlOZyjjex5UM4qnO43zJDu4tvELaoFcvbYJDU/+cBQu0WhtL4bkpcLG1o7ItcwZ17GhbsCWEEKJkkSazYuCLL3IPhnqwmht4MZF3cSWDxQyhGfuKbDAEMGLEnZmmHdVRWeYMEkIIYS0JiIo4Pz8YPDinvYrfaM1qHjSnDOMznmQxV3Fu015ess435IiOyjJnkBBCCGtJk1kRlluH44Yc5CCNdWlt+IVfaePgUtmHpQ7SUVG5L9ZqK9OcQfZuihNCCFHySEBUBN28CZ65zJc4h1GM4hPz61OEUIN4jMXo7cypVia3xVptZWqK69NHC34yB0UyZ5AQQojMpMmsiHnwwZyDoYpcQmHQBUOP8wXVOFWsgqHCnMlZ5gwSQghhjeLzLVoK1Kx5Zwh4VkNZyEKe0aX5cYVk/BxfMDsr7FoZezfFCSGEKHkkICoCdu+G1q0t73PjNucJpAJJ5rSZTOBFZhZO4ezI3x/mz3dOrYw9m+KEEEKUPBIQOVluHafbsZWt3KdLq8sRjlLXwaWyr7JlYfJkePllqZURQghRNElA5ES5BUOr6UEPfjS/3kQHOhFHUVt6IzcGAzz2GCxZIoGQEEKIok0CIifJKRgK4wQnqKFLe4Cf+JkHCqFUtitXThsVZ+LjA23bQmSkNvmiu7t9rmM0Oq4PkCPPXVzIMygceT1na9+H0v5+lfb7Fw6iSpGPP/5YhYWFKQ8PD3X33XerXbt2WXVccnKyAlRycrJdyqENAM++vcZUXUIqZZQ7t3LMX1S2gACloqOViotTKj3dLo9IZ8UKpUJC9NcMCdHSi/K5iwt5BoUjr+ds7ftQ2t+v0n7/wja2fH+XmoBo2bJlyt3dXf3nP/9RBw8eVM8884zy8/NTiYmJeR5rz4Dor7+yBxTepGRLHMVHTg90rN0MBm1zxC+kFSu0czvimo48d3Ehz6Bw5PWcJ0yw7n0o7e9Xab9/YTtbvr9LzWr3bdq04a677uLjjz8GICMjg9DQUJ5//nkmTZqU67H2XO3e1RUyMu68jmIFK+ijyxNEAucJKtB1ClvmlePt2ZRl7Wr1tl7TkecuLuQZFI68njNoz9dotLzP9D788w/UqlV63y/5vIr8sOX7u1RMzJiWlsbu3buJiIgwp7m4uBAREcHOnTuz5U9NTSUlJUW32UvmYMiN27pgaDFDMKCKXTAE2t9pppXj7cWW1eqL0rmLC3kGhSOv5ww5B0Nw532YO7d0v1/yeRWOVioCoosXL2I0GgkK0gcaQUFBJCQkZMs/ffp0fH19zVtoaKhDypWOG1/RD4AW/MGTLHbIdQqTPVeOd+Rq9Y48d3Ehz6Bw2Ov5HTtWuNcrauTzKhytVAREtpo8eTLJycnm7dSpU3Y7dw3dADIDA/gKA4q9tLDbNZzJnivHO3K1ekeeu7iQZ1A47PX8atUq3OsVNfJ5FY5WKgKiSpUq4erqSmJioi49MTGR4ODgbPk9PDzw8fHRbfby++92O1WRYjDYf40y02r1OU1RUJBrOvLcxYU8g8KR13MGrc9LXu/DiBGl+/2Sz6twtFIRELm7u9OqVSs2bNhgTsvIyGDDhg2Eh4cXalkqVoSg4tdFCIBx47RfOll/ITlq5XjTavWZr2Gvazry3MWFPIPCkddzNhhg7Nic94P2Pri7l+73Sz6vwuEcPuatiFi2bJny8PBQixcvVocOHVLDhg1Tfn5+KiEhIc9j7T0PkVJKBQXlPpQ9NdXyfBuZNze3nIfB23NYfWho7nOlZN7vCI68pjPup6iRZ1A48nrO1r4Ppf39Ku33L2wjw+5z8PHHH/Puu++SkJBA8+bN+eijj2jTpk2ex9lz2H1mly/DXXfB8ePaaxcXOHQI6tW7kyfzjKyBgVra+fN3Zmc1GmHOHNi2Dby9YdAgbRHTHTvuHJOWBkuXwrVrcO+9UL8+fPUV/PuvNox1yBDo0EF/TNbr5Gc2XXuSmaodS55B4ZCZqu2jtN+/sJ4t39+lKiDKL0cFREIIIYRwHJmHSAghhBDCBhIQCSGEEKLUk4BICCGEEKWeBERCCCGEKPUkIBJCCCFEqScBkRBCCCFKPQmIhBBCCFHqSUAkhBBCiFJPAiIhhBBClHpuzi5AcWCazDslJcXJJRFCCCGEtUzf29YsyiEBkRWuXr0KQGhoqJNLIoQQQghbXb16FV9f31zzyFpmVsjIyODs2bOUL18eg8Fgt/OmpKQQGhrKqVOnZI00K8jzso08L9vI87KNPC/byPOyjb2el1KKq1evUqVKFVxccu8lJDVEVnBxcSEkJMRh5/fx8ZH/IDaQ52UbeV62kedlG3letpHnZRt7PK+8aoZMpFO1EEIIIUo9CYiEEEIIUepJQOREHh4eTJkyBQ8PD2cXpViQ52UbeV62kedlG3letpHnZRtnPC/pVC2EEEKIUk9qiIQQQghR6klAJIQQQohSTwIiIYQQQpR6EhAJIYQQotSTgMhJPvnkE6pXr07ZsmVp06YNv/76q7OLVCi2bNnCgw8+SJUqVTAYDHz//fe6/UopXnvtNSpXrky5cuWIiIjg6NGjujyXL19m4MCB+Pj44Ofnx9ChQ7l27Zouz59//kn79u0pW7YsoaGhzJw509G3ZnfTp0/nrrvuonz58gQGBtKrVy+OHDmiy3Pr1i1GjhyJv78/3t7e9O7dm8TERF2ekydP0qNHDzw9PQkMDGTChAmkp6fr8mzatImWLVvi4eFB7dq1Wbx4saNvz+7mzZtH06ZNzRO5hYeHs3btWvN+eVa5mzFjBgaDgejoaHOaPLM7pk6disFg0G3169c375dnld2ZM2d4/PHH8ff3p1y5cjRp0oTff//dvL/I/b5XotAtW7ZMubu7q//85z/q4MGD6plnnlF+fn4qMTHR2UVzuB9//FG9/PLLKiYmRgHqu+++0+2fMWOG8vX1Vd9//73at2+feuihh1SNGjXUzZs3zXm6du2qmjVrpn755Re1detWVbt2bdW/f3/z/uTkZBUUFKQGDhyoDhw4oL766itVrlw59dlnnxXWbdpFZGSkWrRokTpw4IDau3ev6t69u6pWrZq6du2aOc9zzz2nQkND1YYNG9Tvv/+u7rnnHtW2bVvz/vT0dNW4cWMVERGh9uzZo3788UdVqVIlNXnyZHOe48ePK09PTzV27Fh16NAhNWfOHOXq6qrWrVtXqPdbUKtWrVJr1qxRf//9tzpy5Ih66aWXVJkyZdSBAweUUvKscvPrr7+q6tWrq6ZNm6rRo0eb0+WZ3TFlyhTVqFEjde7cOfN24cIF8355VnqXL19WYWFh6oknnlC7du1Sx48fV+vXr1f//POPOU9R+30vAZET3H333WrkyJHm10ajUVWpUkVNnz7diaUqfFkDooyMDBUcHKzeffddc1pSUpLy8PBQX331lVJKqUOHDilA/fbbb+Y8a9euVQaDQZ05c0YppdTcuXNVhQoVVGpqqjnPiy++qOrVq+fgO3Ks8+fPK0Bt3rxZKaU9mzJlyqjly5eb8xw+fFgBaufOnUopLQB1cXFRCQkJ5jzz5s1TPj4+5uczceJE1ahRI921+vbtqyIjIx19Sw5XoUIFtXDhQnlWubh69aqqU6eOio2NVR06dDAHRPLM9KZMmaKaNWtmcZ88q+xefPFF1a5duxz3F8Xf99JkVsjS0tLYvXs3ERER5jQXFxciIiLYuXOnE0vmfPHx8SQkJOieja+vL23atDE/m507d+Ln50fr1q3NeSIiInBxcWHXrl3mPPfddx/u7u7mPJGRkRw5coQrV64U0t3YX3JyMgAVK1YEYPfu3dy+fVv3vOrXr0+1atV0z6tJkyYEBQWZ80RGRpKSksLBgwfNeTKfw5SnOH8ejUYjy5Yt4/r164SHh8uzysXIkSPp0aNHtvuSZ5bd0aNHqVKlCjVr1mTgwIGcPHkSkGdlyapVq2jdujWPPvoogYGBtGjRggULFpj3F8Xf9xIQFbKLFy9iNBp1/ykAgoKCSEhIcFKpigbT/ef2bBISEggMDNTtd3Nzo2LFiro8ls6R+RrFTUZGBtHR0dx77700btwY0O7F3d0dPz8/Xd6szyuvZ5FTnpSUFG7evOmI23GY/fv34+3tjYeHB8899xzfffcdDRs2lGeVg2XLlvHHH38wffr0bPvkmem1adOGxYsXs27dOubNm0d8fDzt27fn6tWr8qwsOH78OPPmzaNOnTqsX7+e4cOH88ILL/D5558DRfP3vax2L0QxMHLkSA4cOMC2bducXZQirV69euzdu5fk5GS+/fZbhgwZwubNm51drCLp1KlTjB49mtjYWMqWLevs4hR53bp1M/+7adOmtGnThrCwML755hvKlSvnxJIVTRkZGbRu3Zq3334bgBYtWnDgwAE+/fRThgwZ4uTSWSY1RIWsUqVKuLq6Zht9kJiYSHBwsJNKVTSY7j+3ZxMcHMz58+d1+9PT07l8+bIuj6VzZL5GcTJq1ChWr15NXFwcISEh5vTg4GDS0tJISkrS5c/6vPJ6Fjnl8fHxKXa/6N3d3alduzatWrVi+vTpNGvWjNmzZ8uzsmD37t2cP3+eli1b4ubmhpubG5s3b+ajjz7Czc2NoKAgeWa58PPzo27duvzzzz/y+bKgcuXKNGzYUJfWoEEDczNjUfx9LwFRIXN3d6dVq1Zs2LDBnJaRkcGGDRsIDw93Ysmcr0aNGgQHB+ueTUpKCrt27TI/m/DwcJKSkti9e7c5z8aNG8nIyKBNmzbmPFu2bOH27dvmPLGxsdSrV48KFSoU0t0UnFKKUaNG8d1337Fx40Zq1Kih29+qVSvKlCmje15Hjhzh5MmTuue1f/9+3S+V2NhYfHx8zL+swsPDdecw5SkJn8eMjAxSU1PlWVnQuXNn9u/fz969e81b69atGThwoPnf8sxydu3aNY4dO0blypXl82XBvffem22akL///puwsDCgiP6+t7kbtiiwZcuWKQ8PD7V48WJ16NAhNWzYMOXn56cbfVBSXb16Ve3Zs0ft2bNHAeqDDz5Qe/bsUf/++69SShuG6efnp1auXKn+/PNP9fDDD1schtmiRQu1a9cutW3bNlWnTh3dMMykpCQVFBSkBg0apA4cOKCWLVumPD09i92w++HDhytfX1+1adMm3VDfGzdumPM899xzqlq1amrjxo3q999/V+Hh4So8PNy83zTUt0uXLmrv3r1q3bp1KiAgwOJQ3wkTJqjDhw+rTz75pFgO9Z00aZLavHmzio+PV3/++aeaNGmSMhgM6qefflJKybOyRuZRZkrJM8ts3LhxatOmTSo+Pl5t375dRUREqEqVKqnz588rpeRZZfXrr78qNzc39dZbb6mjR4+qJUuWKE9PT/Xll1+a8xS13/cSEDnJnDlzVLVq1ZS7u7u6++671S+//OLsIhWKuLg4BWTbhgwZopTShmK++uqrKigoSHl4eKjOnTurI0eO6M5x6dIl1b9/f+Xt7a18fHzUk08+qa5evarLs2/fPtWuXTvl4eGhqlatqmbMmFFYt2g3lp4ToBYtWmTOc/PmTTVixAhVoUIF5enpqR555BF17tw53XlOnDihunXrpsqVK6cqVaqkxo0bp27fvq3LExcXp5o3b67c3d1VzZo1ddcoLp566ikVFham3N3dVUBAgOrcubM5GFJKnpU1sgZE8szu6Nu3r6pcubJyd3dXVatWVX379tXNqSPPKrsffvhBNW7cWHl4eKj69eur+fPn6/YXtd/3BqWUsq1OSQghhBCiZJE+REIIIYQo9SQgEkIIIUSpJwGREEIIIUo9CYiEEEIIUepJQCSEEEKIUk8CIiGEEEKUehIQCSGEEKLUk4BICCGEEKWeBERCCKfo2LEj0dHRDjt/9erVmTVrlsPObw1H36MQwn7cnF0AIYRwhN9++w0vLy+nliEmJoYyZcqYX1evXp3o6GgJkoQogiQgEkKUSAEBAU67dlpaGu7u7lSsWNFpZRBC2EaazIQQRUJqairjx4+natWqeHl50aZNGzZt2mTe/++///Lggw9SoUIFvLy8aNSoET/++GOO58vaZGYwGFi4cCGPPPIInp6e1KlTh1WrVpn3X7lyhYEDBxIQEEC5cuWoU6cOixYtMu8/ffo0/fv3p2LFinh5edG6dWt27doFwNSpU2nevDkLFy6kRo0alC1bFtA3mXXs2JF///2XMWPGYDAYMBgM+bovIYRjSA2REKJIGDVqFIcOHWLZsmVUqVKF7777jq5du7J//37q1KnDyJEjSUtLY8uWLXh5eXHo0CG8vb1tusa0adOYOXMm7777LnPmzGHgwIH8+++/VKxYkVdffZVDhw6xdu1aKlWqxD///MPNmzcBuHbtGh06dKBq1aqsWrWK4OBg/vjjDzIyMszn/ueff1ixYgUxMTG4urpmu3ZMTAzNmjVj2LBhPPPMM+Z0e9yXEKLgJCASQjjdyZMnWbRoESdPnqRKlSoAjB8/nnXr1rFo0SLefvttTp48Se/evWnSpAkANWvWtPk6TzzxBP379wfg7bff5qOPPuLXX3+la9eunDx5khYtWtC6dWtAq2EyWbp0KRcuXOC3334zN4PVrl1bd+60tDT++9//5thUV7FiRVxdXSlfvjzBwcG6ey/ofQkhCk4CIiGE0+3fvx+j0UjdunV16ampqfj7+wPwwgsvMHz4cH766SciIiLo3bs3TZs2tek6mfN7eXnh4+PD+fPnARg+fDi9e/fmjz/+oEuXLvTq1Yu2bdsCsHfvXlq0aJFrn6CwsLB89Vuyx30JIQpO+hAJIZzu2rVruLq6snv3bvbu3WveDh8+zOzZswF4+umnOX78OIMGDWL//v20bt2aOXPm2HSdzCO+QOtXZGr26tatm7mPz9mzZ+ncuTPjx48HoFy5cnmeO78j2uxxX0KIgpOASAjhdC1atMBoNHL+/Hlq166t2zI3L4WGhvLcc88RExPDuHHjWLBggV3LERAQwJAhQ/jyyy+ZNWsW8+fPB7Sapb1793L58uUCnd/d3R2j0Zgt3dH3JYTImwREQginq1u3LgMHDmTw4MHExMQQHx/Pr7/+yvTp01mzZg0A0dHRrF+/nvj4eP744w/i4uJo0KCB3crw2muvsXLlSv755x8OHjzI6tWrzefv378/wcHB9OrVi+3bt3P8+HFWrFjBzp07bbpG9erV2bJlC2fOnOHixYuFcl9CCOtIQCSEKBIWLVrE4MGDGTduHPXq1aNXr1789ttvVKtWDQCj0cjIkSNp0KABXbt2pW7dusydO9du13d3d2fy5Mk0bdqU++67D1dXV5YtW2be99NPPxEYGEj37t1p0qQJM2bMsDiaLDevv/46J06coFatWub+Ro6+LyGEdQxKKeXsQgghhBBCOJPUEAkhhBCi1JOASAghhBClngREQgghhCj1JCASQgghRKknAZEQQgghSj0JiIQQQghR6klAJIQQQohSTwIiIYQQQpR6EhAJIYQQotSTgEgIIYQQpZ4EREIIIYQo9f4fhG2aSffhw8cAAAAASUVORK5CYII=",
|
| 1069 |
+
"text/plain": [
|
| 1070 |
+
"<Figure size 640x480 with 1 Axes>"
|
| 1071 |
+
]
|
| 1072 |
+
},
|
| 1073 |
+
"metadata": {},
|
| 1074 |
+
"output_type": "display_data"
|
| 1075 |
+
}
|
| 1076 |
+
],
|
| 1077 |
+
"source": [
|
| 1078 |
+
"#interpretation\n",
|
| 1079 |
+
"fig,ax = plt.subplots()\n",
|
| 1080 |
+
"ax.scatter(x_test,y_test,color='blue')\n",
|
| 1081 |
+
"ax.plot(x_test,predict,color='red')\n",
|
| 1082 |
+
"plt.title(\"Prédiction de la Présence (Attendency) (R2 = 0.98)\")\n",
|
| 1083 |
+
"plt.xlabel(\"les inscrits\")\n",
|
| 1084 |
+
"plt.ylabel(\"les presents\")\n",
|
| 1085 |
+
"plt.show()\n"
|
| 1086 |
+
]
|
| 1087 |
+
},
|
| 1088 |
+
{
|
| 1089 |
+
"cell_type": "code",
|
| 1090 |
+
"execution_count": 22,
|
| 1091 |
+
"id": "b962144b-ab7e-42e3-ba8e-2b31953d64ca",
|
| 1092 |
+
"metadata": {},
|
| 1093 |
+
"outputs": [
|
| 1094 |
+
{
|
| 1095 |
+
"name": "stdout",
|
| 1096 |
+
"output_type": "stream",
|
| 1097 |
+
"text": [
|
| 1098 |
+
"avec MAE : 28.234713006519993\n",
|
| 1099 |
+
"avec R2 : 0.9824798790918774\n"
|
| 1100 |
+
]
|
| 1101 |
+
}
|
| 1102 |
+
],
|
| 1103 |
+
"source": [
|
| 1104 |
+
"#mesure performance\n",
|
| 1105 |
+
"print ('avec MAE :', mean_absolute_error(y_test,predict))\n",
|
| 1106 |
+
"print ('avec R2 :', r2_score(y_test,predict))\n",
|
| 1107 |
+
"#score r2 mmoyen , ameliorons ca avec les dates"
|
| 1108 |
+
]
|
| 1109 |
+
},
|
| 1110 |
+
{
|
| 1111 |
+
"cell_type": "code",
|
| 1112 |
+
"execution_count": null,
|
| 1113 |
+
"id": "4f3a0ef8-9b7b-4cc1-a332-18bf1b8d136c",
|
| 1114 |
+
"metadata": {},
|
| 1115 |
+
"outputs": [],
|
| 1116 |
+
"source": []
|
| 1117 |
+
}
|
| 1118 |
+
],
|
| 1119 |
+
"metadata": {
|
| 1120 |
+
"kernelspec": {
|
| 1121 |
+
"display_name": "Python 3.10 (ml-env)",
|
| 1122 |
+
"language": "python",
|
| 1123 |
+
"name": "ml-env"
|
| 1124 |
+
},
|
| 1125 |
+
"language_info": {
|
| 1126 |
+
"codemirror_mode": {
|
| 1127 |
+
"name": "ipython",
|
| 1128 |
+
"version": 3
|
| 1129 |
+
},
|
| 1130 |
+
"file_extension": ".py",
|
| 1131 |
+
"mimetype": "text/x-python",
|
| 1132 |
+
"name": "python",
|
| 1133 |
+
"nbconvert_exporter": "python",
|
| 1134 |
+
"pygments_lexer": "ipython3",
|
| 1135 |
+
"version": "3.11.9"
|
| 1136 |
+
}
|
| 1137 |
+
},
|
| 1138 |
+
"nbformat": 4,
|
| 1139 |
+
"nbformat_minor": 5
|
| 1140 |
+
}
|