sahlnizar commited on
Commit
5da71f2
·
verified ·
1 Parent(s): 62db04f

Add files using upload-large-folder tool

Browse files
Files changed (49) hide show
  1. .gitattributes +35 -35
  2. course_feedback_nlp/Coursera_courses.csv +624 -0
  3. course_feedback_nlp/Untitled.ipynb +418 -0
  4. course_feedback_nlp/evaluate.py +548 -0
  5. course_feedback_nlp/requirements.txt +7 -0
  6. course_feedback_nlp/test.py +52 -0
  7. course_feedback_nlp/train.py +862 -0
  8. course_feedback_nlp/train_3_classes.py +872 -0
  9. dropout_binaryclass/correlation.py +218 -0
  10. dropout_binaryclass/data.csv +0 -0
  11. dropout_binaryclass/feature_importance.png +0 -0
  12. dropout_binaryclass/feature_selection_recommendations.txt +42 -0
  13. dropout_binaryclass/model_config.json +411 -0
  14. dropout_binaryclass/predict_students_dropout_and_academic_success_model.pkl +0 -0
  15. dropout_binaryclass/redundant_feature_pairs.csv +16 -0
  16. dropout_binaryclass/target_correlations.csv +37 -0
  17. dropout_binaryclass/train.ipynb +0 -0
  18. dropout_binaryclass/train.py +224 -0
  19. grade_multiclass/02_grade_distribution.png +0 -0
  20. grade_multiclass/03_performance_index_distribution.png +0 -0
  21. grade_multiclass/04_features_by_grade.png +0 -0
  22. grade_multiclass/05_extracurricular_analysis.png +0 -0
  23. grade_multiclass/06_correlation_heatmap.png +0 -0
  24. grade_multiclass/09_feature_importance.png +0 -0
  25. grade_multiclass/10_learning_curves.png +0 -0
  26. grade_multiclass/11_model_comparison.png +0 -0
  27. grade_multiclass/Student_Performance.csv +0 -0
  28. grade_multiclass/correlation_heatmap.png +0 -0
  29. grade_multiclass/feature_importance.png +0 -0
  30. grade_multiclass/features_by_grade.png +0 -0
  31. grade_multiclass/learning_curves.png +0 -0
  32. grade_multiclass/model_comparison.png +0 -0
  33. grade_multiclass/student_performance_classification.ipynb +0 -0
  34. grade_multiclass/student_performance_classification.py +1100 -0
  35. grade_multiclass/target_distribution.png +0 -0
  36. lr_attendance/2018-2019_Daily_Attendance_20240429.csv +0 -0
  37. lr_attendance/add_weather_features.py +195 -0
  38. lr_attendance/best_model_coefficients.csv +13 -0
  39. lr_attendance/explore_data.py +28 -0
  40. lr_attendance/feature_engineering.py +154 -0
  41. lr_attendance/feature_info.json +118 -0
  42. lr_attendance/final_coefficients.csv +13 -0
  43. lr_attendance/final_predictions.csv +0 -0
  44. lr_attendance/improved_predictions.csv +0 -0
  45. lr_attendance/model_comparison.csv +5 -0
  46. lr_attendance/model_summary.csv +2 -0
  47. lr_attendance/nyc_weather_2018_2019.csv +297 -0
  48. lr_attendance/prepare_for_modeling.py +215 -0
  49. lr_attendance/train.ipynb +1140 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
course_feedback_nlp/Coursera_courses.csv ADDED
@@ -0,0 +1,624 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,institution,course_url,course_id
2
+ Machine Learning,Stanford University,https://www.coursera.org/learn/machine-learning,machine-learning
3
+ Indigenous Canada,University of Alberta,https://www.coursera.org/learn/indigenous-canada,indigenous-canada
4
+ The Science of Well-Being,Yale University,https://www.coursera.org/learn/the-science-of-well-being,the-science-of-well-being
5
+ Technical Support Fundamentals,Google,https://www.coursera.org/learn/technical-support-fundamentals,technical-support-fundamentals
6
+ Become a CBRS Certified Professional Installer by Google,Google - Spectrum Sharing,https://www.coursera.org/learn/google-cbrs-cpi-training,google-cbrs-cpi-training
7
+ Financial Markets,Yale University,https://www.coursera.org/learn/financial-markets-global,financial-markets-global
8
+ Introduction to Psychology,Yale University,https://www.coursera.org/learn/introduction-psychology,introduction-psychology
9
+ Programming for Everybody (Getting Started with Python),University of Michigan,https://www.coursera.org/learn/python,python
10
+ The Bits and Bytes of Computer Networking,Google,https://www.coursera.org/learn/computer-networking,computer-networking
11
+ AI For Everyone,DeepLearning.AI,https://www.coursera.org/learn/ai-for-everyone,ai-for-everyone
12
+ Crash Course on Python,Google,https://www.coursera.org/learn/python-crash-course,python-crash-course
13
+ Psychological First Aid,Johns Hopkins University,https://www.coursera.org/learn/psychological-first-aid,psychological-first-aid
14
+ Neural Networks and Deep Learning,DeepLearning.AI,https://www.coursera.org/learn/neural-networks-deep-learning,neural-networks-deep-learning
15
+ What is Data Science?,IBM,https://www.coursera.org/learn/what-is-datascience,what-is-datascience
16
+ Successful Negotiation: Essential Strategies and Skills,University of Michigan,https://www.coursera.org/learn/negotiation-skills,negotiation-skills
17
+ Fundamentals of Project Planning and Management,University of Virginia,https://www.coursera.org/learn/uva-darden-project-management,uva-darden-project-management
18
+ Project Launch,"University of California, Irvine",https://www.coursera.org/learn/project-management,project-management
19
+ "Brand Management: Aligning Business, Brand and Behaviour",London Business School,https://www.coursera.org/learn/brand,brand
20
+ Writing in the Sciences,Stanford University,https://www.coursera.org/learn/sciwrite,sciwrite
21
+ Stanford Introduction to Food and Health,Stanford University,https://www.coursera.org/learn/food-and-health,food-and-health
22
+ "HTML, CSS, and Javascript for Web Developers",Johns Hopkins University,https://www.coursera.org/learn/html-css-javascript-for-web-developers,html-css-javascript-for-web-developers
23
+ Excel Skills for Business: Essentials,Macquarie University,https://www.coursera.org/learn/excel-essentials,excel-essentials
24
+ Introduction to Negotiation: A Strategic Playbook for Becoming a Principled and Persuasive Negotiator,Yale University,https://www.coursera.org/learn/negotiation,negotiation
25
+ "Everyday Excel, Part 1",University of Colorado Boulder,https://www.coursera.org/learn/everyday-excel-part-1,everyday-excel-part-1
26
+ Learning How to Learn: Powerful mental tools to help you master tough subjects,University of California San Diego,https://www.coursera.org/learn/learning-how-to-learn,learning-how-to-learn
27
+ Google Cloud Platform Fundamentals: Core Infrastructure,Google Cloud,https://www.coursera.org/learn/gcp-fundamentals,gcp-fundamentals
28
+ Viral Marketing and How to Craft Contagious Content,University of Pennsylvania,https://www.coursera.org/learn/wharton-contagious-viral-marketing,wharton-contagious-viral-marketing
29
+ Python Data Structures,University of Michigan,https://www.coursera.org/learn/python-data,python-data
30
+ Private Equity and Venture Capital,Università Bocconi,https://www.coursera.org/learn/private-equity,private-equity
31
+ First Step Korean,Yonsei University,https://www.coursera.org/learn/learn-korean,learn-korean
32
+ "Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning",DeepLearning.AI,https://www.coursera.org/learn/introduction-tensorflow,introduction-tensorflow
33
+ Operating Systems and You: Becoming a Power User,Google,https://www.coursera.org/learn/os-power-user,os-power-user
34
+ Tools for Data Science,IBM,https://www.coursera.org/learn/open-source-tools-for-data-science,open-source-tools-for-data-science
35
+ "Improving Deep Neural Networks: Hyperparameter tuning, Regularization and Optimization",DeepLearning.AI,https://www.coursera.org/learn/deep-neural-network,deep-neural-network
36
+ Diversity and inclusion in the workplace,ESSEC Business School,https://www.coursera.org/learn/diversity-inclusion-workplace,diversity-inclusion-workplace
37
+ Design and Interpretation of Clinical Trials,Johns Hopkins University,https://www.coursera.org/learn/clinical-trials,clinical-trials
38
+ Visual Elements of User Interface Design,California Institute of the Arts,https://www.coursera.org/learn/visual-elements-user-interface-design,visual-elements-user-interface-design
39
+ Management of Fashion and Luxury Companies,Università Bocconi,https://www.coursera.org/learn/mafash,mafash
40
+ Primeros Auxilios Psicológicos (PAP),Universitat Autònoma de Barcelona,https://www.coursera.org/learn/pap,pap
41
+ Social Psychology,Wesleyan University,https://www.coursera.org/learn/social-psychology,social-psychology
42
+ Initiating and Planning Projects,"University of California, Irvine",https://www.coursera.org/learn/project-planning,project-planning
43
+ Computational Thinking for Problem Solving,University of Pennsylvania,https://www.coursera.org/learn/computational-thinking-problem-solving,computational-thinking-problem-solving
44
+ Agile with Atlassian Jira,Atlassian,https://www.coursera.org/learn/agile-atlassian-jira,agile-atlassian-jira
45
+ Fundamentals of Graphic Design,California Institute of the Arts,https://www.coursera.org/learn/fundamentals-of-graphic-design,fundamentals-of-graphic-design
46
+ Introduction to User Experience Design,Georgia Institute of Technology,https://www.coursera.org/learn/user-experience-design,user-experience-design
47
+ Introduction to Marketing,University of Pennsylvania,https://www.coursera.org/learn/wharton-marketing,wharton-marketing
48
+ Python for Data Science and AI,IBM,https://www.coursera.org/learn/python-for-applied-data-science-ai,python-for-applied-data-science-ai
49
+ Marketing Analytics,University of Virginia,https://www.coursera.org/learn/uva-darden-market-analytics,uva-darden-market-analytics
50
+ Natural Language Processing with Classification and Vector Spaces,DeepLearning.AI,https://www.coursera.org/learn/classification-vector-spaces-in-nlp,classification-vector-spaces-in-nlp
51
+ Fundamentals of Quantitative Modeling,University of Pennsylvania,https://www.coursera.org/learn/wharton-quantitative-modeling,wharton-quantitative-modeling
52
+ How to Manage a Remote Team,GitLab,https://www.coursera.org/learn/remote-team-management,remote-team-management
53
+ Mathematics for Machine Learning: Linear Algebra,Imperial College London,https://www.coursera.org/learn/linear-algebra-machine-learning,linear-algebra-machine-learning
54
+ Introduction to Data Science in Python,University of Michigan,https://www.coursera.org/learn/python-data-analysis,python-data-analysis
55
+ Customer Analytics,University of Pennsylvania,https://www.coursera.org/learn/wharton-customer-analytics,wharton-customer-analytics
56
+ Introduction to Psychology,University of Toronto,https://www.coursera.org/learn/introduction-psych,introduction-psych
57
+ English for Career Development,University of Pennsylvania,https://www.coursera.org/learn/careerdevelopment,careerdevelopment
58
+ Global Diplomacy – Diplomacy in the Modern World,University of London,https://www.coursera.org/learn/global-diplomacy,global-diplomacy
59
+ Game Theory,Stanford University,https://www.coursera.org/learn/game-theory-1,game-theory-1
60
+ SQL for Data Science,"University of California, Davis",https://www.coursera.org/learn/sql-for-data-science,sql-for-data-science
61
+ Write Professional Emails in English,Georgia Institute of Technology,https://www.coursera.org/learn/professional-emails-english,professional-emails-english
62
+ Medical Neuroscience,Duke University,https://www.coursera.org/learn/medical-neuroscience,medical-neuroscience
63
+ System Administration and IT Infrastructure Services,Google,https://www.coursera.org/learn/system-administration-it-infrastructure-services,system-administration-it-infrastructure-services
64
+ International Women's Health and Human Rights,Stanford University,https://www.coursera.org/learn/womens-health-human-rights,womens-health-human-rights
65
+ Child Nutrition and Cooking,Stanford University,https://www.coursera.org/learn/childnutrition,childnutrition
66
+ Understanding the Brain: The Neurobiology of Everyday Life,The University of Chicago,https://www.coursera.org/learn/neurobiology,neurobiology
67
+ Introduction to Social Media Marketing,Facebook,https://www.coursera.org/learn/social-media-marketing-introduction,social-media-marketing-introduction
68
+ Forensic Accounting and Fraud Examination,West Virginia University,https://www.coursera.org/learn/forensic-accounting,forensic-accounting
69
+ Clinical Terminology for International and U.S. Students,University of Pittsburgh,https://www.coursera.org/learn/clinical-terminology,clinical-terminology
70
+ Science of Exercise,University of Colorado Boulder,https://www.coursera.org/learn/science-exercise,science-exercise
71
+ Digital Product Management: Modern Fundamentals,University of Virginia,https://www.coursera.org/learn/uva-darden-digital-product-management,uva-darden-digital-product-management
72
+ Data Science Math Skills,Duke University,https://www.coursera.org/learn/datasciencemathskills,datasciencemathskills
73
+ Structuring Machine Learning Projects,DeepLearning.AI,https://www.coursera.org/learn/machine-learning-projects,machine-learning-projects
74
+ An Introduction to American Law,University of Pennsylvania,https://www.coursera.org/learn/american-law,american-law
75
+ The Strategy of Content Marketing,"University of California, Davis",https://www.coursera.org/learn/content-marketing,content-marketing
76
+ Introduction to Cybersecurity Tools & Cyber Attacks,IBM,https://www.coursera.org/learn/introduction-cybersecurity-cyber-attacks,introduction-cybersecurity-cyber-attacks
77
+ The Data Scientist’s Toolbox,Johns Hopkins University,https://www.coursera.org/learn/data-scientists-tools,data-scientists-tools
78
+ Animal Behaviour and Welfare,The University of Edinburgh,https://www.coursera.org/learn/animal-welfare,animal-welfare
79
+ Convolutional Neural Networks in TensorFlow,DeepLearning.AI,https://www.coursera.org/learn/convolutional-neural-networks-tensorflow,convolutional-neural-networks-tensorflow
80
+ Positive Psychology: Martin E. P. Seligman’s Visionary Science,University of Pennsylvania,https://www.coursera.org/learn/positive-psychology-visionary-science,positive-psychology-visionary-science
81
+ Introduction to the Biology of Cancer,Johns Hopkins University,https://www.coursera.org/learn/cancer,cancer
82
+ Convolutional Neural Networks,DeepLearning.AI,https://www.coursera.org/learn/convolutional-neural-networks,convolutional-neural-networks
83
+ Using Python to Access Web Data,University of Michigan,https://www.coursera.org/learn/python-network-data,python-network-data
84
+ Introductory Human Physiology,Duke University,https://www.coursera.org/learn/physiology,physiology
85
+ Introduction to Systematic Review and Meta-Analysis,Johns Hopkins University,https://www.coursera.org/learn/systematic-review,systematic-review
86
+ Organizational Analysis,Stanford University,https://www.coursera.org/learn/organizational-analysis,organizational-analysis
87
+ Communication Strategies for a Virtual Age,University of Toronto,https://www.coursera.org/learn/communication-strategies-virtual-age,communication-strategies-virtual-age
88
+ Moral Foundations of Politics,Yale University,https://www.coursera.org/learn/moral-politics,moral-politics
89
+ Étudier en France: French Intermediate course B1-B2,École Polytechnique,https://www.coursera.org/learn/etudier-en-france,etudier-en-france
90
+ Managing the Company of the Future,London Business School,https://www.coursera.org/learn/company-future-management,company-future-management
91
+ Finance for Non-Finance Professionals,Rice University,https://www.coursera.org/learn/finance-for-non-finance,finance-for-non-finance
92
+ Site Reliability Engineering: Measuring and Managing Reliability,Google Cloud,https://www.coursera.org/learn/site-reliability-engineering-slos,site-reliability-engineering-slos
93
+ Autism Spectrum Disorder,"University of California, Davis",https://www.coursera.org/learn/autism-spectrum-disorder,autism-spectrum-disorder
94
+ Data Science Methodology,IBM,https://www.coursera.org/learn/data-science-methodology,data-science-methodology
95
+ Introduction to Financial Accounting,University of Pennsylvania,https://www.coursera.org/learn/wharton-accounting,wharton-accounting
96
+ Marketing in a Digital World,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/marketing-digital,marketing-digital
97
+ Wind Energy,Technical University of Denmark (DTU),https://www.coursera.org/learn/wind-energy,wind-energy
98
+ Principles of Sustainable Finance,Erasmus University Rotterdam,https://www.coursera.org/learn/sustainable-finance,sustainable-finance
99
+ Financial Engineering and Risk Management Part I,Columbia University,https://www.coursera.org/learn/financial-engineering-1,financial-engineering-1
100
+ Introduction to Philosophy,The University of Edinburgh,https://www.coursera.org/learn/philosophy,philosophy
101
+ Business Metrics for Data-Driven Companies,Duke University,https://www.coursera.org/learn/analytics-business-metrics,analytics-business-metrics
102
+ Python Basics,University of Michigan,https://www.coursera.org/learn/python-basics,python-basics
103
+ Introduction to Sustainability,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/sustainability,sustainability
104
+ Positive Psychiatry and Mental Health,The University of Sydney,https://www.coursera.org/learn/positive-psychiatry,positive-psychiatry
105
+ Cryptography I,Stanford University,https://www.coursera.org/learn/crypto,crypto
106
+ Learning to Teach Online,UNSW Sydney (The University of New South Wales),https://www.coursera.org/learn/teach-online,teach-online
107
+ IT Security: Defense against the digital dark arts,Google,https://www.coursera.org/learn/it-security,it-security
108
+ Entreprise et changement climatique,ESSEC Business School,https://www.coursera.org/learn/entreprise-changement-climatique,entreprise-changement-climatique
109
+ An Introduction to Consumer Neuroscience & Neuromarketing,Copenhagen Business School,https://www.coursera.org/learn/neuromarketing,neuromarketing
110
+ Gamification,University of Pennsylvania,https://www.coursera.org/learn/gamification,gamification
111
+ "Divide and Conquer, Sorting and Searching, and Randomized Algorithms",Stanford University,https://www.coursera.org/learn/algorithms-divide-conquer,algorithms-divide-conquer
112
+ Contabilidad para no contadores,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/contabilidad,contabilidad
113
+ Using Python to Interact with the Operating System,Google,https://www.coursera.org/learn/python-operating-system,python-operating-system
114
+ Object-Oriented Data Structures in C++,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/cs-fundamentals-1,cs-fundamentals-1
115
+ Google Cloud Platform Big Data and Machine Learning Fundamentals,Google Cloud,https://www.coursera.org/learn/gcp-big-data-ml-fundamentals,gcp-big-data-ml-fundamentals
116
+ Databases and SQL for Data Science,IBM,https://www.coursera.org/learn/sql-data-science,sql-data-science
117
+ Natural Language Processing in TensorFlow,DeepLearning.AI,https://www.coursera.org/learn/natural-language-processing-tensorflow,natural-language-processing-tensorflow
118
+ "Advanced Valuation and Strategy - M&A, Private Equity, and Venture Capital",Erasmus University Rotterdam,https://www.coursera.org/learn/advanced-valuation-and-strategy,advanced-valuation-and-strategy
119
+ Natural Language Processing with Probabilistic Models,DeepLearning.AI,https://www.coursera.org/learn/probabilistic-models-in-nlp,probabilistic-models-in-nlp
120
+ Vital Signs: Understanding What the Body Is Telling Us,University of Pennsylvania,https://www.coursera.org/learn/vital-signs,vital-signs
121
+ Understanding Research Methods,University of London,https://www.coursera.org/learn/research-methods,research-methods
122
+ IBM Customer Engagement Specialist Professional Certificate,IBM,https://www.coursera.org/learn/ibm-customer-engagement-specialist,ibm-customer-engagement-specialist
123
+ Introduction to Calculus,The University of Sydney,https://www.coursera.org/learn/introduction-to-calculus,introduction-to-calculus
124
+ Camino a la Excelencia en Gestión de Proyectos,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/camino-excelencia-gestion-proyectos,camino-excelencia-gestion-proyectos
125
+ Introduction to HTML5,University of Michigan,https://www.coursera.org/learn/html,html
126
+ Wine Tasting: Sensory Techniques for Wine Analysis,"University of California, Davis",https://www.coursera.org/learn/wine,wine
127
+ Excel Skills for Business: Intermediate I,Macquarie University,https://www.coursera.org/learn/excel-intermediate-1,excel-intermediate-1
128
+ "Programming Foundations with JavaScript, HTML and CSS",Duke University,https://www.coursera.org/learn/duke-programming-web,duke-programming-web
129
+ Build a Modern Computer from First Principles: From Nand to Tetris (Project-Centered Course),Hebrew University of Jerusalem,https://www.coursera.org/learn/build-a-computer,build-a-computer
130
+ Food & Beverage Management,Università Bocconi,https://www.coursera.org/learn/food-beverage-management,food-beverage-management
131
+ Data Analysis with Python,IBM,https://www.coursera.org/learn/data-analysis-with-python,data-analysis-with-python
132
+ Project Planning,"University of California, Irvine",https://www.coursera.org/learn/project-planning-1,project-planning-1
133
+ Agile Meets Design Thinking,University of Virginia,https://www.coursera.org/learn/uva-darden-getting-started-agile,uva-darden-getting-started-agile
134
+ AWS Fundamentals: Going Cloud-Native,Amazon Web Services,https://www.coursera.org/learn/aws-fundamentals-going-cloud-native,aws-fundamentals-going-cloud-native
135
+ Construction Project Management,Columbia University,https://www.coursera.org/learn/construction-project-management,construction-project-management
136
+ Introduction to Mathematical Thinking,Stanford University,https://www.coursera.org/learn/mathematical-thinking,mathematical-thinking
137
+ Everyday Parenting: The ABCs of Child Rearing,Yale University,https://www.coursera.org/learn/everyday-parenting,everyday-parenting
138
+ Introduction to Healthcare,Stanford University,https://www.coursera.org/learn/intro-to-healthcare,intro-to-healthcare
139
+ Machine Learning with Python,IBM,https://www.coursera.org/learn/machine-learning-with-python,machine-learning-with-python
140
+ Terrorism and Counterterrorism: Comparing Theory and Practice,Universiteit Leiden,https://www.coursera.org/learn/terrorism,terrorism
141
+ Data Management for Clinical Research,Vanderbilt University,https://www.coursera.org/learn/clinical-data-management,clinical-data-management
142
+ Sustainable Fashion,Copenhagen Business School,https://www.coursera.org/learn/sustainable-fashion,sustainable-fashion
143
+ Foundations of Data Science: K-Means Clustering in Python,University of London,https://www.coursera.org/learn/data-science-k-means-clustering-python,data-science-k-means-clustering-python
144
+ Instructional Design Foundations and Applications,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/instructional-design-foundations-applications,instructional-design-foundations-applications
145
+ Cursos en línea: modelo para armar,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/enlinea,enlinea
146
+ Modern Art & Ideas,The Museum of Modern Art,https://www.coursera.org/learn/modern-art-ideas,modern-art-ideas
147
+ "Speak English Professionally: In Person, Online & On the Phone",Georgia Institute of Technology,https://www.coursera.org/learn/speak-english-professionally,speak-english-professionally
148
+ Essential Google Cloud Infrastructure: Foundation,Google Cloud,https://www.coursera.org/learn/gcp-infrastructure-foundation,gcp-infrastructure-foundation
149
+ Introduction to Artificial Intelligence (AI),IBM,https://www.coursera.org/learn/introduction-to-ai,introduction-to-ai
150
+ Dog Emotion and Cognition,Duke University,https://www.coursera.org/learn/dog-emotion-and-cognition,dog-emotion-and-cognition
151
+ International Leadership and Organizational Behavior,Università Bocconi,https://www.coursera.org/learn/organizational-behavior,organizational-behavior
152
+ Driving business towards the Sustainable Development Goals,Erasmus University Rotterdam,https://www.coursera.org/learn/sdgbusiness,sdgbusiness
153
+ "The Sustainable Development Goals – A global, transdisciplinary vision for the future",University of Copenhagen,https://www.coursera.org/learn/global-sustainable-development,global-sustainable-development
154
+ Digital Transformation,BCG,https://www.coursera.org/learn/bcg-uva-darden-digital-transformation,bcg-uva-darden-digital-transformation
155
+ Sequence Models,DeepLearning.AI,https://www.coursera.org/learn/nlp-sequence-models,nlp-sequence-models
156
+ Devenir entrepreneur du changement,HEC Paris,https://www.coursera.org/learn/entrepreneur-changement,entrepreneur-changement
157
+ Seeing Through Photographs,The Museum of Modern Art,https://www.coursera.org/learn/photography,photography
158
+ Entrepreneurship 1: Developing the Opportunity,University of Pennsylvania,https://www.coursera.org/learn/wharton-entrepreneurship-opportunity,wharton-entrepreneurship-opportunity
159
+ Introduction to Search Engine Optimization,"University of California, Davis",https://www.coursera.org/learn/search-engine-optimization,search-engine-optimization
160
+ Learn to Speak Korean 1,Yonsei University,https://www.coursera.org/learn/learn-speak-korean1,learn-speak-korean1
161
+ Circular Economy - Sustainable Materials Management,Delft University of Technology,https://www.coursera.org/learn/circular-economy,circular-economy
162
+ Drug Development,University of California San Diego,https://www.coursera.org/learn/drug-development,drug-development
163
+ R Programming,Johns Hopkins University,https://www.coursera.org/learn/r-programming,r-programming
164
+ Economics of Money and Banking,Columbia University,https://www.coursera.org/learn/money-banking,money-banking
165
+ Chinese for Beginners,Peking University,https://www.coursera.org/learn/learn-chinese,learn-chinese
166
+ Grammar and Punctuation,"University of California, Irvine",https://www.coursera.org/learn/grammar-punctuation,grammar-punctuation
167
+ Japanese for beginners 1,Saint Petersburg State University,https://www.coursera.org/learn/japanese-1,japanese-1
168
+ Introduction to English Common Law,University of London,https://www.coursera.org/learn/intro-common-law,intro-common-law
169
+ Introduction to Dental Medicine,University of Pennsylvania,https://www.coursera.org/learn/dental-medicine-penn,dental-medicine-penn
170
+ Fundamentals of Reinforcement Learning,Alberta Machine Intelligence Institute,https://www.coursera.org/learn/fundamentals-of-reinforcement-learning,fundamentals-of-reinforcement-learning
171
+ The Power of Macroeconomics: Economic Principles in the Real World,"University of California, Irvine",https://www.coursera.org/learn/principles-of-macroeconomics,principles-of-macroeconomics
172
+ Corporate Sustainability. Understanding and Seizing the Strategic Opportunity,Università Bocconi,https://www.coursera.org/learn/corp-sustainability,corp-sustainability
173
+ Behavioral Finance,Duke University,https://www.coursera.org/learn/duke-behavioral-finance,duke-behavioral-finance
174
+ "Sequences, Time Series and Prediction",DeepLearning.AI,https://www.coursera.org/learn/tensorflow-sequences-time-series-and-prediction,tensorflow-sequences-time-series-and-prediction
175
+ Supply Chain Logistics,Rutgers the State University of New Jersey,https://www.coursera.org/learn/supply-chain-logistics,supply-chain-logistics
176
+ Project Execution,"University of California, Irvine",https://www.coursera.org/learn/project-execution,project-execution
177
+ Nutrición y obesidad: control de sobrepeso,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/nutricion-obesidad-sobrepeso,nutricion-obesidad-sobrepeso
178
+ Microeconomics Principles,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/microeconomics,microeconomics
179
+ Creative Writing: The Craft of Plot,Wesleyan University,https://www.coursera.org/learn/craft-of-plot,craft-of-plot
180
+ Astronomy: Exploring Time and Space,University of Arizona,https://www.coursera.org/learn/astro,astro
181
+ Oil & Gas Industry Operations and Markets,Duke University,https://www.coursera.org/learn/oilandgas,oilandgas
182
+ Design Thinking for Innovation,University of Virginia,https://www.coursera.org/learn/uva-darden-design-thinking-innovation,uva-darden-design-thinking-innovation
183
+ EDIVET: Do you have what it takes to be a veterinarian?,The University of Edinburgh,https://www.coursera.org/learn/becoming-a-veterinarian,becoming-a-veterinarian
184
+ Learn to Program: The Fundamentals,University of Toronto,https://www.coursera.org/learn/learn-to-program,learn-to-program
185
+ Financial Accounting Fundamentals,University of Virginia,https://www.coursera.org/learn/uva-darden-financial-accounting,uva-darden-financial-accounting
186
+ Finding Purpose and Meaning In Life: Living for What Matters Most,University of Michigan,https://www.coursera.org/learn/finding-purpose-and-meaning-in-life,finding-purpose-and-meaning-in-life
187
+ Understanding Clinical Research: Behind the Statistics,University of Cape Town,https://www.coursera.org/learn/clinical-research,clinical-research
188
+ Epidemiology: The Basic Science of Public Health,The University of North Carolina at Chapel Hill,https://www.coursera.org/learn/epidemiology,epidemiology
189
+ Fashion as Design,The Museum of Modern Art,https://www.coursera.org/learn/fashion-design,fashion-design
190
+ Teamwork Skills: Communicating Effectively in Groups,University of Colorado Boulder,https://www.coursera.org/learn/teamwork-skills-effective-communication,teamwork-skills-effective-communication
191
+ Feminism and Social Justice,"University of California, Santa Cruz",https://www.coursera.org/learn/feminism-social-justice,feminism-social-justice
192
+ International Organizations Management,University of Geneva,https://www.coursera.org/learn/international-organizations-management,international-organizations-management
193
+ Marketing Digital,Universidade de São Paulo,https://www.coursera.org/learn/estrategia-marketing-digital,estrategia-marketing-digital
194
+ Fundamentals of GIS,"University of California, Davis",https://www.coursera.org/learn/gis,gis
195
+ e-Learning Ecologies: Innovative Approaches to Teaching and Learning for the Digital Age,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/elearning,elearning
196
+ "Excel/VBA for Creative Problem Solving, Part 1",University of Colorado Boulder,https://www.coursera.org/learn/excel-vba-for-creative-problem-solving-part-1,excel-vba-for-creative-problem-solving-part-1
197
+ Rethinking International Tax Law,Universiteit Leiden,https://www.coursera.org/learn/international-taxation,international-taxation
198
+ Introduction to Probability and Data with R,Duke University,https://www.coursera.org/learn/probability-intro,probability-intro
199
+ Understanding and Visualizing Data with Python,University of Michigan,https://www.coursera.org/learn/understanding-visualization-data,understanding-visualization-data
200
+ Fundamentals of Visualization with Tableau,"University of California, Davis",https://www.coursera.org/learn/data-visualization-tableau,data-visualization-tableau
201
+ Getting Started with SAS Programming,SAS,https://www.coursera.org/learn/sas-programming-basics,sas-programming-basics
202
+ Machine Learning for All,University of London,https://www.coursera.org/learn/uol-machine-learning-for-all,uol-machine-learning-for-all
203
+ Using Databases with Python,University of Michigan,https://www.coursera.org/learn/python-databases,python-databases
204
+ Addiction Treatment: Clinical Skills for Healthcare Providers,Yale University,https://www.coursera.org/learn/addiction-treatment,addiction-treatment
205
+ Dino 101: Dinosaur Paleobiology,University of Alberta,https://www.coursera.org/learn/dino101,dino101
206
+ Sports Marketing,Northwestern University,https://www.coursera.org/learn/sports-marketing,sports-marketing
207
+ Positive Psychology,The University of North Carolina at Chapel Hill,https://www.coursera.org/learn/positive-psychology,positive-psychology
208
+ Introduction to Programming with MATLAB,Vanderbilt University,https://www.coursera.org/learn/matlab,matlab
209
+ Preparing to Manage Human Resources,University of Minnesota,https://www.coursera.org/learn/managing-human-resources,managing-human-resources
210
+ Solar Energy Basics,The State University of New York,https://www.coursera.org/learn/solar-energy-basics,solar-energy-basics
211
+ Front-End Web UI Frameworks and Tools: Bootstrap 4,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/bootstrap-4,bootstrap-4
212
+ Building Scalable Java Microservices with Spring Boot and Spring Cloud,Google Cloud,https://www.coursera.org/learn/google-cloud-java-spring,google-cloud-java-spring
213
+ Introduction to Forensic Science,"Nanyang Technological University, Singapore",https://www.coursera.org/learn/forensic-science,forensic-science
214
+ Google Cloud Product Fundamentals,Google Cloud,https://www.coursera.org/learn/google-cloud-product-fundamentals,google-cloud-product-fundamentals
215
+ American Contract Law I,Yale University,https://www.coursera.org/learn/contracts-1,contracts-1
216
+ Engineering Health: Introduction to Yoga and Physiology,New York University,https://www.coursera.org/learn/engineering-health-yoga-physiology,engineering-health-yoga-physiology
217
+ AI for Medical Diagnosis,DeepLearning.AI,https://www.coursera.org/learn/ai-for-medical-diagnosis,ai-for-medical-diagnosis
218
+ Natural Language Processing with Sequence Models,DeepLearning.AI,https://www.coursera.org/learn/sequence-models-in-nlp,sequence-models-in-nlp
219
+ Introduction to Electronics,Georgia Institute of Technology,https://www.coursera.org/learn/electronics,electronics
220
+ International Humanitarian Law in Theory and Practice,Universiteit Leiden,https://www.coursera.org/learn/international-humanitarian-law,international-humanitarian-law
221
+ Making Architecture,IE School of Architecture & Design,https://www.coursera.org/learn/making-architecture,making-architecture
222
+ Model Thinking,University of Michigan,https://www.coursera.org/learn/model-thinking,model-thinking
223
+ Supporting children with difficulties in reading and writing,University of London,https://www.coursera.org/learn/dyslexia-difficulties,dyslexia-difficulties
224
+ Innovation Management,Erasmus University Rotterdam,https://www.coursera.org/learn/innovation-management,innovation-management
225
+ The Manager's Toolkit: A Practical Guide to Managing People at Work,"Birkbeck, University of London",https://www.coursera.org/learn/people-management,people-management
226
+ "The Modern World, Part One: Global History from 1760 to 1910",University of Virginia,https://www.coursera.org/learn/modern-world,modern-world
227
+ Fundamentals of Music Theory,The University of Edinburgh,https://www.coursera.org/learn/edinburgh-music-theory,edinburgh-music-theory
228
+ Supply Chain Principles,Georgia Institute of Technology,https://www.coursera.org/learn/supply-chain-principles,supply-chain-principles
229
+ Essential Google Cloud Infrastructure: Core Services,Google Cloud,https://www.coursera.org/learn/gcp-infrastructure-core-services,gcp-infrastructure-core-services
230
+ Weight Management: Beyond Balancing Calories,Emory University,https://www.coursera.org/learn/weight-management-beyond-balancing-calories,weight-management-beyond-balancing-calories
231
+ Miracles of Human Language: An Introduction to Linguistics,Universiteit Leiden,https://www.coursera.org/learn/human-language,human-language
232
+ Java Programming: Solving Problems with Software,Duke University,https://www.coursera.org/learn/java-programming,java-programming
233
+ Race and Cultural Diversity in American Life and History,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/race-cultural-diversity-american-life,race-cultural-diversity-american-life
234
+ Inspiring and Motivating Individuals,University of Michigan,https://www.coursera.org/learn/motivate-people-teams,motivate-people-teams
235
+ "Competencias digitales. Herramientas de ofimática (Microsoft Word, Excel, Power Point)",Universitat Autònoma de Barcelona,https://www.coursera.org/learn/competencias-digitales-ofimatica,competencias-digitales-ofimatica
236
+ Healing with the Arts,University of Florida,https://www.coursera.org/learn/healing-with-the-arts,healing-with-the-arts
237
+ People Analytics,University of Pennsylvania,https://www.coursera.org/learn/wharton-people-analytics,wharton-people-analytics
238
+ What is Social?,Northwestern University,https://www.coursera.org/learn/what-is-social,what-is-social
239
+ UX Design Fundamentals,California Institute of the Arts,https://www.coursera.org/learn/ux-design-fundamentals,ux-design-fundamentals
240
+ Creative Thinking: Techniques and Tools for Success,Imperial College London,https://www.coursera.org/learn/creative-thinking-techniques-and-tools-for-success,creative-thinking-techniques-and-tools-for-success
241
+ Introduction to Classical Music,Yale University,https://www.coursera.org/learn/introclassicalmusic,introclassicalmusic
242
+ Children's Human Rights - An Interdisciplinary Introduction,University of Geneva,https://www.coursera.org/learn/childrens-rights,childrens-rights
243
+ Investment Management in an Evolving and Volatile World by HEC Paris and AXA Investment Managers,HEC Paris,https://www.coursera.org/learn/investment-management,investment-management
244
+ Introduction to Data Analysis Using Excel,Rice University,https://www.coursera.org/learn/excel-data-analysis,excel-data-analysis
245
+ Mind Control: Managing Your Mental Health During COVID-19,University of Toronto,https://www.coursera.org/learn/manage-health-covid-19,manage-health-covid-19
246
+ Introduction to International Criminal Law,Case Western Reserve University,https://www.coursera.org/learn/international-criminal-law,international-criminal-law
247
+ "FinTech: Foundations, Payments, and Regulations",University of Pennsylvania,https://www.coursera.org/learn/wharton-fintech-overview-payments-regulations,wharton-fintech-overview-payments-regulations
248
+ Greek and Roman Mythology,University of Pennsylvania,https://www.coursera.org/learn/mythology,mythology
249
+ Politics and Economics of International Energy,Sciences Po,https://www.coursera.org/learn/global-energy,global-energy
250
+ Continuous Delivery & DevOps,University of Virginia,https://www.coursera.org/learn/uva-darden-continous-delivery-devops,uva-darden-continous-delivery-devops
251
+ Teach English Now! Foundational Principles,Arizona State University,https://www.coursera.org/learn/english-principles,english-principles
252
+ Business Model Innovation,HEC Paris,https://www.coursera.org/learn/business-model,business-model
253
+ Introduction to User Experience Principles and Processes,University of Michigan,https://www.coursera.org/learn/introtoux-principles-and-processes,introtoux-principles-and-processes
254
+ Beyond the Sustainable Development Goals (SDGs): Addressing Sustainability and Development,University of Michigan,https://www.coursera.org/learn/beyond-the-sustainable-development-goals-addressing-sustainability-and-development,beyond-the-sustainable-development-goals-addressing-sustainability-and-development
255
+ Process Mining: Data science in Action,Eindhoven University of Technology,https://www.coursera.org/learn/process-mining,process-mining
256
+ Fundamentals of Immunology: Innate Immunity and B-Cell Function,Rice University,https://www.coursera.org/learn/immunologyfundamentalsimmunitybcells,immunologyfundamentalsimmunitybcells
257
+ Introduction to Corporate Finance,University of Pennsylvania,https://www.coursera.org/learn/wharton-finance,wharton-finance
258
+ Global Diplomacy: the United Nations in the World,University of London,https://www.coursera.org/learn/global-diplomacy-un,global-diplomacy-un
259
+ Algorithmic Toolbox,University of California San Diego,https://www.coursera.org/learn/algorithmic-toolbox,algorithmic-toolbox
260
+ Troubles du spectre de l'autisme : diagnostic,University of Geneva,https://www.coursera.org/learn/troubles-spectre-autisme-diagnostic,troubles-spectre-autisme-diagnostic
261
+ Anatomy: Musculoskeletal and Integumentary Systems,University of Michigan,https://www.coursera.org/learn/anatomy403-1x,anatomy403-1x
262
+ Unraveling the Cycling City,University of Amsterdam,https://www.coursera.org/learn/unraveling-the-cycling-city,unraveling-the-cycling-city
263
+ A Crash Course in Causality: Inferring Causal Effects from Observational Data,University of Pennsylvania,https://www.coursera.org/learn/crash-course-in-causality,crash-course-in-causality
264
+ English for Business and Entrepreneurship,University of Pennsylvania,https://www.coursera.org/learn/business,business
265
+ Natural Language Processing with Attention Models,DeepLearning.AI,https://www.coursera.org/learn/attention-models-in-nlp,attention-models-in-nlp
266
+ What is Compliance?,University of Pennsylvania,https://www.coursera.org/learn/what-is-compliance,what-is-compliance
267
+ Getting Started with Google Sheets,Google Cloud,https://www.coursera.org/learn/getting-started-with-google-sheets,getting-started-with-google-sheets
268
+ Data Visualization with Python,IBM,https://www.coursera.org/learn/python-for-data-visualization,python-for-data-visualization
269
+ Foundations of Mindfulness,Rice University,https://www.coursera.org/learn/foundations-of-mindfulness,foundations-of-mindfulness
270
+ Negociación exitosa: Estrategias y habilidades esenciales (en español),University of Michigan,https://www.coursera.org/learn/negociacion,negociacion
271
+ Data-driven Decision Making,PwC,https://www.coursera.org/learn/decision-making,decision-making
272
+ Fundamentals of Engineering Exam Review,Georgia Institute of Technology,https://www.coursera.org/learn/fe-exam,fe-exam
273
+ Gender and Sexuality: Diversity and Inclusion in the Workplace,University of Pittsburgh,https://www.coursera.org/learn/gender-sexuality,gender-sexuality
274
+ Managerial Accounting Fundamentals,University of Virginia,https://www.coursera.org/learn/uva-darden-managerial-accounting,uva-darden-managerial-accounting
275
+ Search Engine Optimization Fundamentals,"University of California, Davis",https://www.coursera.org/learn/seo-fundamentals,seo-fundamentals
276
+ Essentials of Global Health,Yale University,https://www.coursera.org/learn/essentials-global-health,essentials-global-health
277
+ International Security Management,Erasmus University Rotterdam,https://www.coursera.org/learn/international-security-management,international-security-management
278
+ Getting Started with AWS Machine Learning,Amazon Web Services,https://www.coursera.org/learn/aws-machine-learning,aws-machine-learning
279
+ Arts and Heritage Management,Università Bocconi,https://www.coursera.org/learn/arts-heritage,arts-heritage
280
+ Understanding Einstein: The Special Theory of Relativity,Stanford University,https://www.coursera.org/learn/einstein-relativity,einstein-relativity
281
+ Réussir le Changement,ESSEC Business School,https://www.coursera.org/learn/reussir-le-changement,reussir-le-changement
282
+ Equine Welfare and Management,"University of California, Davis",https://www.coursera.org/learn/equine,equine
283
+ International migrations: a global issue,Sciences Po,https://www.coursera.org/learn/international-migrations,international-migrations
284
+ Introduction to Web Development,"University of California, Davis",https://www.coursera.org/learn/web-development,web-development
285
+ Writing and Editing: Word Choice and Word Order,University of Michigan,https://www.coursera.org/learn/writing-editing-words,writing-editing-words
286
+ Introduction to the Digital Advertising Landscape,University of Colorado Boulder,https://www.coursera.org/learn/digital-advertising-landscape,digital-advertising-landscape
287
+ Access Controls,(ISC)²,https://www.coursera.org/learn/access-control-sscp,access-control-sscp
288
+ Engineering Project Management: Initiating and Planning,Rice University,https://www.coursera.org/learn/initiating-planning,initiating-planning
289
+ Kotlin for Java Developers,JetBrains,https://www.coursera.org/learn/kotlin-for-java-developers,kotlin-for-java-developers
290
+ Mathematics for Machine Learning: Multivariate Calculus,Imperial College London,https://www.coursera.org/learn/multivariate-calculus-machine-learning,multivariate-calculus-machine-learning
291
+ Introduction to Git and GitHub,Google,https://www.coursera.org/learn/introduction-git-github,introduction-git-github
292
+ Industrial Biotechnology,University of Manchester ,https://www.coursera.org/learn/industrial-biotech,industrial-biotech
293
+ The Addicted Brain,Emory University,https://www.coursera.org/learn/addiction-and-the-brain,addiction-and-the-brain
294
+ Introducción a la programación en Python I: Aprendiendo a programar con Python,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/aprendiendo-programar-python,aprendiendo-programar-python
295
+ Modernizing Data Lakes and Data Warehouses with GCP,Google Cloud,https://www.coursera.org/learn/data-lakes-data-warehouses-gcp,data-lakes-data-warehouses-gcp
296
+ Drug Discovery,University of California San Diego,https://www.coursera.org/learn/drug-discovery,drug-discovery
297
+ Nutrition and Lifestyle in Pregnancy,Ludwig-Maximilians-Universität München (LMU),https://www.coursera.org/learn/nutrition-pregnancy,nutrition-pregnancy
298
+ Financial Acumen for Non-Financial Managers,University of Pennsylvania,https://www.coursera.org/learn/finance-healthcare-managers,finance-healthcare-managers
299
+ Python and Statistics for Financial Analysis,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/python-statistics-financial-analysis,python-statistics-financial-analysis
300
+ Bugs 101: Insect-Human Interactions,University of Alberta,https://www.coursera.org/learn/bugs-101,bugs-101
301
+ Autodesk Certified Professional: Revit for Architectural Design Exam Prep,Autodesk,https://www.coursera.org/learn/autodesk-revit-architectural-design,autodesk-revit-architectural-design
302
+ "Leading for Equity, Diversity and Inclusion in Higher Education",University of Michigan,https://www.coursera.org/learn/leading-for-equity-diversity-inclusion,leading-for-equity-diversity-inclusion
303
+ Digital Media and Marketing Strategies,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/marketing-plan,marketing-plan
304
+ Enterprise Architecture,Peter the Great St. Petersburg Polytechnic University,https://www.coursera.org/learn/enterprise-architecture,enterprise-architecture
305
+ Introduction to Spreadsheets and Models,University of Pennsylvania,https://www.coursera.org/learn/wharton-introduction-spreadsheets-models,wharton-introduction-spreadsheets-models
306
+ The Arts and Science of Relationships: Understanding Human Needs,University of Toronto,https://www.coursera.org/learn/human-needs,human-needs
307
+ Essentials in Clinical Simulations Across the Health Professions,The George Washington University,https://www.coursera.org/learn/clinicalsimulations,clinicalsimulations
308
+ Budgeting and Scheduling Projects,"University of California, Irvine",https://www.coursera.org/learn/schedule-projects,schedule-projects
309
+ Machine Learning for Business Professionals,Google Cloud,https://www.coursera.org/learn/machine-learning-business-professionals,machine-learning-business-professionals
310
+ Introduction to Accounting Data Analytics and Visualization,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/intro-accounting-data-analytics-visual,intro-accounting-data-analytics-visual
311
+ Spanish Vocabulary: Meeting People,"University of California, Davis",https://www.coursera.org/learn/spanish-vocabulary-meeting-people,spanish-vocabulary-meeting-people
312
+ Gestión Empresarial Exitosa para Pymes,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/gestionempresarialpyme,gestionempresarialpyme
313
+ Public Policy Challenges of the 21st Century,University of Virginia,https://www.coursera.org/learn/public-policy,public-policy
314
+ International Law in Action: the Arbitration of International Disputes,Universiteit Leiden,https://www.coursera.org/learn/arbitration-international-disputes,arbitration-international-disputes
315
+ Introduction to Ancient Egypt and Its Civilization,University of Pennsylvania,https://www.coursera.org/learn/introancientegypt,introancientegypt
316
+ Financing and Investing in Infrastructure,Università Bocconi,https://www.coursera.org/learn/infrastructure-investing,infrastructure-investing
317
+ Global Environmental Management,Technical University of Denmark (DTU),https://www.coursera.org/learn/global-environmental-management,global-environmental-management
318
+ Operations Analytics,University of Pennsylvania,https://www.coursera.org/learn/wharton-operations-analytics,wharton-operations-analytics
319
+ Entrepreneurship Strategy: From Ideation to Exit,HEC Paris,https://www.coursera.org/learn/entrepreneurship-strategy,entrepreneurship-strategy
320
+ FinTech Law and Policy,Duke University,https://www.coursera.org/learn/fintechlawandpolicy,fintechlawandpolicy
321
+ The Social Context of Mental Health and Illness,University of Toronto,https://www.coursera.org/learn/mental-health,mental-health
322
+ What Is Contemporary Art?,The Museum of Modern Art,https://www.coursera.org/learn/contemporary-art,contemporary-art
323
+ The Art of Music Production,Berklee College of Music,https://www.coursera.org/learn/producing-music,producing-music
324
+ Biohacking Your Brain's Health,Emory University,https://www.coursera.org/learn/biohacking-your-brains-health,biohacking-your-brains-health
325
+ Bayesian Statistics: From Concept to Data Analysis,"University of California, Santa Cruz",https://www.coursera.org/learn/bayesian-statistics,bayesian-statistics
326
+ Reporting extra-financier et stratégie RSE,ESSEC Business School,https://www.coursera.org/learn/reporting-extra-financier-strategie-rse,reporting-extra-financier-strategie-rse
327
+ Leading Healthcare Quality and Safety,The George Washington University,https://www.coursera.org/learn/quality-healthcare,quality-healthcare
328
+ Understanding International Relations Theory,National Research University Higher School of Economics,https://www.coursera.org/learn/international-relations-theory,international-relations-theory
329
+ Introduction to Data Analytics,IBM,https://www.coursera.org/learn/introduction-to-data-analytics,introduction-to-data-analytics
330
+ Fundamentos de Excel para Negocios,Universidad Austral,https://www.coursera.org/learn/excel-para-negocios,excel-para-negocios
331
+ Elastic Google Cloud Infrastructure: Scaling and Automation,Google Cloud,https://www.coursera.org/learn/gcp-infrastructure-scaling-automation,gcp-infrastructure-scaling-automation
332
+ Cultural Competence - Aboriginal Sydney,The University of Sydney,https://www.coursera.org/learn/cultural-competence-aboriginal-sydney,cultural-competence-aboriginal-sydney
333
+ Fundamentos de Finanzas Empresariales,Universidad de los Andes,https://www.coursera.org/learn/finanzas-empresariales,finanzas-empresariales
334
+ Greening the Economy: Sustainable Cities,Lund University,https://www.coursera.org/learn/gte-sustainable-cities,gte-sustainable-cities
335
+ Introduction to Engineering Mechanics,Georgia Institute of Technology,https://www.coursera.org/learn/engineering-mechanics-statics,engineering-mechanics-statics
336
+ Design-Led Strategy: Design thinking for business strategy and entrepreneurship,The University of Sydney,https://www.coursera.org/learn/design-strategy,design-strategy
337
+ Biology Meets Programming: Bioinformatics for Beginners,University of California San Diego,https://www.coursera.org/learn/bioinformatics,bioinformatics
338
+ Understanding Medical Research: Your Facebook Friend is Wrong,Yale University,https://www.coursera.org/learn/medical-research,medical-research
339
+ Health Behavior Change: From Evidence to Action,Yale University,https://www.coursera.org/learn/health-behavior-change,health-behavior-change
340
+ Ordered Data Structures,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/cs-fundamentals-2,cs-fundamentals-2
341
+ Mindshift: Break Through Obstacles to Learning and Discover Your Hidden Potential,McMaster University,https://www.coursera.org/learn/mindshift,mindshift
342
+ Programming Fundamentals,Duke University,https://www.coursera.org/learn/programming-fundamentals,programming-fundamentals
343
+ Understanding Financial Markets,University of Geneva,https://www.coursera.org/learn/understanding-financial-markets,understanding-financial-markets
344
+ In the Studio: Postwar Abstract Painting,The Museum of Modern Art,https://www.coursera.org/learn/painting,painting
345
+ Drug Commercialization,University of California San Diego,https://www.coursera.org/learn/drug-commercialization,drug-commercialization
346
+ Introduction to Software Product Management,University of Alberta,https://www.coursera.org/learn/introduction-to-software-product-management,introduction-to-software-product-management
347
+ "Social Norms, Social Change I",Unicef,https://www.coursera.org/learn/norms,norms
348
+ Excel Skills for Business: Intermediate II,Macquarie University,https://www.coursera.org/learn/excel-intermediate-2,excel-intermediate-2
349
+ Aboriginal Worldviews and Education,University of Toronto,https://www.coursera.org/learn/aboriginal-education,aboriginal-education
350
+ "Information Systems Auditing, Controls and Assurance",The Hong Kong University of Science and Technology,https://www.coursera.org/learn/information-systems-audit,information-systems-audit
351
+ Six Sigma Principles,University System of Georgia,https://www.coursera.org/learn/six-sigma-principles,six-sigma-principles
352
+ Business Writing,University of Colorado Boulder,https://www.coursera.org/learn/writing-for-business,writing-for-business
353
+ Autodesk Certified Professional: AutoCAD for Design and Drafting Exam Prep,Autodesk,https://www.coursera.org/learn/autodesk-autocad-design-drafting,autodesk-autocad-design-drafting
354
+ Introduction to Typography,California Institute of the Arts,https://www.coursera.org/learn/typography,typography
355
+ Customer Segmentation and Prospecting,Northwestern University,https://www.coursera.org/learn/customer-segmentation-prospecting,customer-segmentation-prospecting
356
+ Claves para Gestionar Personas,IESE Business School,https://www.coursera.org/learn/gestionar-personas,gestionar-personas
357
+ English for Journalism,University of Pennsylvania,https://www.coursera.org/learn/journalism,journalism
358
+ How Things Work: An Introduction to Physics,University of Virginia,https://www.coursera.org/learn/how-things-work,how-things-work
359
+ Business English: Networking,University of Washington,https://www.coursera.org/learn/business-english-intro,business-english-intro
360
+ Summary Statistics in Public Health,Johns Hopkins University,https://www.coursera.org/learn/summary-statistics,summary-statistics
361
+ The Changing Global Order,Universiteit Leiden,https://www.coursera.org/learn/changing-global-order,changing-global-order
362
+ Global Energy and Climate Policy,University of London,https://www.coursera.org/learn/globalenergyandclimatepolicy,globalenergyandclimatepolicy
363
+ El Abogado del Futuro: Legaltech y la Transformación Digital del Derecho,Universidad Austral,https://www.coursera.org/learn/legaltech,legaltech
364
+ Probability and Statistics: To p or not to p?,University of London,https://www.coursera.org/learn/probability-statistics,probability-statistics
365
+ Gut Check: Exploring Your Microbiome,University of Colorado Boulder,https://www.coursera.org/learn/microbiome,microbiome
366
+ Econometrics: Methods and Applications,Erasmus University Rotterdam,https://www.coursera.org/learn/erasmus-econometrics,erasmus-econometrics
367
+ Разработка веб-сервисов на Go - основы языка,Moscow Institute of Physics and Technology,https://www.coursera.org/learn/golang-webservices-1,golang-webservices-1
368
+ Mastering Data Analysis in Excel,Duke University,https://www.coursera.org/learn/analytics-excel,analytics-excel
369
+ Basic Statistics,University of Amsterdam,https://www.coursera.org/learn/basic-statistics,basic-statistics
370
+ "Capstone: Retrieving, Processing, and Visualizing Data with Python",University of Michigan,https://www.coursera.org/learn/python-data-visualization,python-data-visualization
371
+ Design Thinking for the Greater Good: Innovation in the Social Sector,University of Virginia,https://www.coursera.org/learn/uva-darden-design-thinking-social-sector,uva-darden-design-thinking-social-sector
372
+ Introduction to Portfolio Construction and Analysis with Python,EDHEC Business School,https://www.coursera.org/learn/introduction-portfolio-construction-python,introduction-portfolio-construction-python
373
+ Data Analytics for Lean Six Sigma,University of Amsterdam,https://www.coursera.org/learn/data-analytics-for-lean-six-sigma,data-analytics-for-lean-six-sigma
374
+ Refugees in the 21st Century,University of London,https://www.coursera.org/learn/refugees-21st-century,refugees-21st-century
375
+ Building Containerized Applications on AWS,Amazon Web Services,https://www.coursera.org/learn/containerized-apps-on-aws,containerized-apps-on-aws
376
+ Business Transformation with Google Cloud,Google Cloud,https://www.coursera.org/learn/business-transformation-google-cloud,business-transformation-google-cloud
377
+ Version Control with Git,Atlassian,https://www.coursera.org/learn/version-control-with-git,version-control-with-git
378
+ "Transmedia Storytelling: Narrative worlds, emerging technologies, and global audiences",UNSW Sydney (The University of New South Wales),https://www.coursera.org/learn/transmedia-storytelling,transmedia-storytelling
379
+ Excel aplicado a los negocios (Nivel Avanzado),Universidad Austral,https://www.coursera.org/learn/excel-aplicado-negocios-avanzado,excel-aplicado-negocios-avanzado
380
+ Introduction to Public Speaking,University of Washington,https://www.coursera.org/learn/public-speaking,public-speaking
381
+ Building Conversational Experiences with Dialogflow,Google Cloud,https://www.coursera.org/learn/conversational-experiences-dialogflow,conversational-experiences-dialogflow
382
+ Guitar for Beginners,Berklee College of Music,https://www.coursera.org/learn/guitar,guitar
383
+ Managing Project Risks and Changes,"University of California, Irvine",https://www.coursera.org/learn/project-risk-management,project-risk-management
384
+ L'excellence opérationnelle en pratique,ESSEC Business School,https://www.coursera.org/learn/excellence-operationnelle,excellence-operationnelle
385
+ Introduction to Cloud Computing,IBM,https://www.coursera.org/learn/introduction-to-cloud,introduction-to-cloud
386
+ Sample-based Learning Methods,Alberta Machine Intelligence Institute,https://www.coursera.org/learn/sample-based-learning-methods,sample-based-learning-methods
387
+ Functional Programming Principles in Scala,École Polytechnique Fédérale de Lausanne,https://www.coursera.org/learn/progfun1,progfun1
388
+ Introduction to Blockchain Technologies,INSEAD,https://www.coursera.org/learn/introduction-blockchain-technologies,introduction-blockchain-technologies
389
+ Introduction to Environmental Law and Policy,The University of North Carolina at Chapel Hill,https://www.coursera.org/learn/environmental-law,environmental-law
390
+ "Cameras, Exposure, and Photography",Michigan State University,https://www.coursera.org/learn/exposure-photography,exposure-photography
391
+ Democracia y decisiones públicas. Introducción al análisis de políticas públicas,Universitat Autònoma de Barcelona,https://www.coursera.org/learn/democracia,democracia
392
+ Dentistry 101,University of Michigan,https://www.coursera.org/learn/dentistry101,dentistry101
393
+ "Python Functions, Files, and Dictionaries",University of Michigan,https://www.coursera.org/learn/python-functions-files-dictionaries,python-functions-files-dictionaries
394
+ Anticorrupción: Introducción a conceptos y perspectiva práctica,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/anticorrupcion-introduccion,anticorrupcion-introduccion
395
+ Positive Psychology: Applications and Interventions,University of Pennsylvania,https://www.coursera.org/learn/positive-psychology-applications,positive-psychology-applications
396
+ Introduction to Embedded Systems Software and Development Environments,University of Colorado Boulder,https://www.coursera.org/learn/introduction-embedded-systems,introduction-embedded-systems
397
+ Personal & Family Financial Planning,University of Florida,https://www.coursera.org/learn/family-planning,family-planning
398
+ A Law Student's Toolkit,Yale University,https://www.coursera.org/learn/law-student,law-student
399
+ Introducción a Data Science: Programación Estadística con R,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/intro-data-science-programacion-estadistica-r,intro-data-science-programacion-estadistica-r
400
+ "Cybersecurity Roles, Processes & Operating System Security",IBM,https://www.coursera.org/learn/cybersecurity-roles-processes-operating-system-security,cybersecurity-roles-processes-operating-system-security
401
+ Computational Neuroscience,University of Washington,https://www.coursera.org/learn/computational-neuroscience,computational-neuroscience
402
+ De-Mystifying Mindfulness,Universiteit Leiden,https://www.coursera.org/learn/mindfulness,mindfulness
403
+ Smart Cities – Management of Smart Urban Infrastructures,École Polytechnique Fédérale de Lausanne,https://www.coursera.org/learn/smart-cities,smart-cities
404
+ Getting Started with Go,"University of California, Irvine",https://www.coursera.org/learn/golang-getting-started,golang-getting-started
405
+ Introduction to Economic Theories,Erasmus University Rotterdam,https://www.coursera.org/learn/intro-economic-theories,intro-economic-theories
406
+ Probabilistic Graphical Models 1: Representation,Stanford University,https://www.coursera.org/learn/probabilistic-graphical-models,probabilistic-graphical-models
407
+ The Power of Microeconomics: Economic Principles in the Real World,"University of California, Irvine",https://www.coursera.org/learn/principles-of-microeconomics,principles-of-microeconomics
408
+ Introduction to Personal Branding,University of Virginia,https://www.coursera.org/learn/personal-branding,personal-branding
409
+ Love as a Force for Social Justice,Stanford University,https://www.coursera.org/learn/love-social-justice,love-social-justice
410
+ Mathematical Thinking in Computer Science,University of California San Diego,https://www.coursera.org/learn/what-is-a-proof,what-is-a-proof
411
+ Introduction to Genetics and Evolution,Duke University,https://www.coursera.org/learn/genetics-evolution,genetics-evolution
412
+ Основы программирования на Python,National Research University Higher School of Economics,https://www.coursera.org/learn/python-osnovy-programmirovaniya,python-osnovy-programmirovaniya
413
+ Improving Communication Skills,University of Pennsylvania,https://www.coursera.org/learn/wharton-communication-skills,wharton-communication-skills
414
+ "Introduction to Trading, Machine Learning & GCP",New York Institute of Finance,https://www.coursera.org/learn/introduction-trading-machine-learning-gcp,introduction-trading-machine-learning-gcp
415
+ Python Programming: A Concise Introduction,Wesleyan University,https://www.coursera.org/learn/python-programming-introduction,python-programming-introduction
416
+ "The Modern World, Part Two: Global History since 1910",University of Virginia,https://www.coursera.org/learn/modern-world-2,modern-world-2
417
+ Understanding Plants - Part I: What a Plant Knows,Tel Aviv University,https://www.coursera.org/learn/plantknows,plantknows
418
+ Excel Fundamentals for Data Analysis,Macquarie University,https://www.coursera.org/learn/excel-data-analysis-fundamentals,excel-data-analysis-fundamentals
419
+ Finanzas personales,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/finanzas-personales,finanzas-personales
420
+ English Composition I,Duke University,https://www.coursera.org/learn/english-composition,english-composition
421
+ Career 911: Your Future Job in Medicine and Healthcare,Northwestern University,https://www.coursera.org/learn/healthcarejobs,healthcarejobs
422
+ Introduction to Self-Driving Cars,University of Toronto,https://www.coursera.org/learn/intro-self-driving-cars,intro-self-driving-cars
423
+ Corporate & Commercial Law I: Contracts & Employment Law,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/corporate-commercial-law-part1,corporate-commercial-law-part1
424
+ Blockchain Basics,University at Buffalo,https://www.coursera.org/learn/blockchain-basics,blockchain-basics
425
+ Foundations of Business Strategy,University of Virginia,https://www.coursera.org/learn/uva-darden-foundations-business-strategy,uva-darden-foundations-business-strategy
426
+ Introdução à Ciência da Computação com Python Parte 1,Universidade de São Paulo,https://www.coursera.org/learn/ciencia-computacao-python-conceitos,ciencia-computacao-python-conceitos
427
+ Stochastic processes,National Research University Higher School of Economics,https://www.coursera.org/learn/stochasticprocesses,stochasticprocesses
428
+ Foundations for Big Data Analysis with SQL,Cloudera,https://www.coursera.org/learn/foundations-big-data-analysis-sql,foundations-big-data-analysis-sql
429
+ "Innovation Through Design: Think, Make, Break, Repeat",The University of Sydney,https://www.coursera.org/learn/innovation-through-design,innovation-through-design
430
+ Perfect Tenses and Modals,"University of California, Irvine",https://www.coursera.org/learn/perfect-tenses-modals,perfect-tenses-modals
431
+ Getting Started with Azure,LearnQuest,https://www.coursera.org/learn/cloud-azure-intro,cloud-azure-intro
432
+ Moralities of Everyday Life,Yale University,https://www.coursera.org/learn/moralities,moralities
433
+ Revisão Sistemática e Meta-análise,Universidade Estadual de Campinas,https://www.coursera.org/learn/revisao-sistematica,revisao-sistematica
434
+ Understanding child development: from synapse to society,Utrecht University,https://www.coursera.org/learn/child-development,child-development
435
+ Introduction to G Suite,Google Cloud,https://www.coursera.org/learn/introduction-g-suite,introduction-g-suite
436
+ Aprendiendo a aprender: Poderosas herramientas mentales con las que podrás dominar temas difíciles (Learning How to Learn),University of California San Diego,https://www.coursera.org/learn/aprendiendo-a-aprender,aprendiendo-a-aprender
437
+ Building Batch Data Pipelines on GCP,Google Cloud,https://www.coursera.org/learn/batch-data-pipelines-gcp,batch-data-pipelines-gcp
438
+ Financial Planning for Young Adults,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/financial-planning,financial-planning
439
+ Quantitative Methods,University of Amsterdam,https://www.coursera.org/learn/quantitative-methods,quantitative-methods
440
+ "Introduction to Self-Determination Theory: An approach to motivation, development and wellness",University of Rochester,https://www.coursera.org/learn/self-determination-theory,self-determination-theory
441
+ The Technology of Music Production,Berklee College of Music,https://www.coursera.org/learn/technology-of-music-production,technology-of-music-production
442
+ Code Yourself! An Introduction to Programming,The University of Edinburgh,https://www.coursera.org/learn/intro-programming,intro-programming
443
+ Success,University of Pennsylvania,https://www.coursera.org/learn/wharton-success,wharton-success
444
+ Chemicals and Health,Johns Hopkins University,https://www.coursera.org/learn/chemicals-health,chemicals-health
445
+ Improving your statistical inferences,Eindhoven University of Technology,https://www.coursera.org/learn/statistical-inferences,statistical-inferences
446
+ Fundamentals of Finance,University of Pennsylvania,https://www.coursera.org/learn/finance-fundamentals,finance-fundamentals
447
+ How Google does Machine Learning,Google Cloud,https://www.coursera.org/learn/google-machine-learning,google-machine-learning
448
+ Object-Oriented Design,University of Alberta,https://www.coursera.org/learn/object-oriented-design,object-oriented-design
449
+ Introduction to Intellectual Property,University of Pennsylvania,https://www.coursera.org/learn/introduction-intellectual-property,introduction-intellectual-property
450
+ Cost and Economics in Pricing Strategy,BCG,https://www.coursera.org/learn/uva-darden-bcg-pricing-strategy-cost-economics,uva-darden-bcg-pricing-strategy-cost-economics
451
+ Write A Feature Length Screenplay For Film Or Television,Michigan State University,https://www.coursera.org/learn/write-a-feature-length-screenplay-for-film-or-television,write-a-feature-length-screenplay-for-film-or-television
452
+ Marketing Gerencial,Universidad de Chile,https://www.coursera.org/learn/marketing-gerencial,marketing-gerencial
453
+ Corporate Finance Essentials,IESE Business School,https://www.coursera.org/learn/corporate-finance-essentials,corporate-finance-essentials
454
+ Information Security: Context and Introduction,"Royal Holloway, University of London",https://www.coursera.org/learn/information-security-data,information-security-data
455
+ "Anatomy of the Chest, Abdomen, and Pelvis",Yale University,https://www.coursera.org/learn/trunk-anatomy,trunk-anatomy
456
+ Introduction to CSS3,University of Michigan,https://www.coursera.org/learn/introcss,introcss
457
+ Applied Data Science Capstone,IBM,https://www.coursera.org/learn/applied-data-science-capstone,applied-data-science-capstone
458
+ Introduction aux Droits de l’Homme,University of Geneva,https://www.coursera.org/learn/droits-de-lhomme,droits-de-lhomme
459
+ "Programming Languages, Part A",University of Washington,https://www.coursera.org/learn/programming-languages,programming-languages
460
+ Big History: Connecting Knowledge,Macquarie University,https://www.coursera.org/learn/big-history,big-history
461
+ Leadership in 21st Century Organizations,Copenhagen Business School,https://www.coursera.org/learn/leadership-21st-century,leadership-21st-century
462
+ Software Processes and Agile Practices,University of Alberta,https://www.coursera.org/learn/software-processes-and-agile-practices,software-processes-and-agile-practices
463
+ DevOps Culture and Mindset,"University of California, Davis",https://www.coursera.org/learn/devops-culture-and-mindset,devops-culture-and-mindset
464
+ Introduction to Statistics & Data Analysis in Public Health,Imperial College London,https://www.coursera.org/learn/introduction-statistics-data-analysis-public-health,introduction-statistics-data-analysis-public-health
465
+ Discrete Optimization,The University of Melbourne,https://www.coursera.org/learn/discrete-optimization,discrete-optimization
466
+ Основы разработки на C++: белый пояс,Moscow Institute of Physics and Technology,https://www.coursera.org/learn/c-plus-plus-white,c-plus-plus-white
467
+ COVID-19 Contact Tracing For Nursing Professionals,University of Houston,https://www.coursera.org/learn/covid-19-contact-tracing-for-nursing-professionals,covid-19-contact-tracing-for-nursing-professionals
468
+ High Stakes Leadership: Leading in Times of Crisis,University of Michigan,https://www.coursera.org/learn/high-stakes-leadership,high-stakes-leadership
469
+ Essential Epidemiologic Tools for Public Health Practice,Johns Hopkins University,https://www.coursera.org/learn/epidemiology-tools,epidemiology-tools
470
+ Epigenetic Control of Gene Expression,The University of Melbourne,https://www.coursera.org/learn/epigenetics,epigenetics
471
+ "Recruiting, Hiring, and Onboarding Employees",University of Minnesota,https://www.coursera.org/learn/recruiting-hiring-onboarding-employees,recruiting-hiring-onboarding-employees
472
+ AWS Fundamentals: Addressing Security Risk,Amazon Web Services,https://www.coursera.org/learn/aws-fundamentals-addressing-security-risk,aws-fundamentals-addressing-security-risk
473
+ Ancient Philosophy: Plato & His Predecessors,University of Pennsylvania,https://www.coursera.org/learn/plato,plato
474
+ Becoming a changemaker: Introduction to Social Innovation,University of Cape Town,https://www.coursera.org/learn/social-innovation,social-innovation
475
+ Spanish for Successful Communication in Healthcare Settings,Rice University,https://www.coursera.org/learn/spanish-in-healthcare-settings,spanish-in-healthcare-settings
476
+ Leading transformations: Manage change,Macquarie University,https://www.coursera.org/learn/change-management,change-management
477
+ Introduction to Systems Engineering,UNSW Sydney (The University of New South Wales),https://www.coursera.org/learn/systems-engineering,systems-engineering
478
+ Digital Marketing Analytics in Theory,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/marketing-analytics,marketing-analytics
479
+ Data Visualization and Communication with Tableau,Duke University,https://www.coursera.org/learn/analytics-tableau,analytics-tableau
480
+ Bayesian Statistics: Techniques and Models,"University of California, Santa Cruz",https://www.coursera.org/learn/mcmc-bayesian-statistics,mcmc-bayesian-statistics
481
+ Human Rights for Open Societies,Utrecht University,https://www.coursera.org/learn/humanrights,humanrights
482
+ Introduction to Computers and Office Productivity Software,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/introduction-to-computers-and-office-productivity-software,introduction-to-computers-and-office-productivity-software
483
+ The Introduction to Quantum Computing,Saint Petersburg State University,https://www.coursera.org/learn/quantum-computing-algorithms,quantum-computing-algorithms
484
+ Intercultural Management,ESCP Business School,https://www.coursera.org/learn/intercultural,intercultural
485
+ Get Interactive: Practical Teaching with Technology,University of London,https://www.coursera.org/learn/getinmooc,getinmooc
486
+ International Law In Action: Investigating and Prosecuting International Crimes,Universiteit Leiden,https://www.coursera.org/learn/international-law-in-action-2,international-law-in-action-2
487
+ Global Financial Markets and Instruments,Rice University,https://www.coursera.org/learn/global-financial-markets-instruments,global-financial-markets-instruments
488
+ Write Your First Novel,Michigan State University,https://www.coursera.org/learn/write-your-first-novel,write-your-first-novel
489
+ Développement psychologique de l'enfant,University of Geneva,https://www.coursera.org/learn/enfant-developpement,enfant-developpement
490
+ Songwriting: Writing the Lyrics,Berklee College of Music,https://www.coursera.org/learn/songwriting-lyrics,songwriting-lyrics
491
+ "Applied Plotting, Charting & Data Representation in Python",University of Michigan,https://www.coursera.org/learn/python-plotting,python-plotting
492
+ Systems Thinking In Public Health,Johns Hopkins University,https://www.coursera.org/learn/systems-thinking,systems-thinking
493
+ Excel Skills for Business: Advanced,Macquarie University,https://www.coursera.org/learn/excel-advanced,excel-advanced
494
+ Introduction to Neuroeconomics: How the Brain Makes Decisions,National Research University Higher School of Economics,https://www.coursera.org/learn/neuroeconomics,neuroeconomics
495
+ Community Organizing for Social Justice,University of Michigan,https://www.coursera.org/learn/community-organizing,community-organizing
496
+ Build a Modern Computer from First Principles: Nand to Tetris Part II (project-centered course),Hebrew University of Jerusalem,https://www.coursera.org/learn/nand2tetris2,nand2tetris2
497
+ The Global Financial Crisis,Yale University,https://www.coursera.org/learn/global-financial-crisis,global-financial-crisis
498
+ Agile и Scrum в работе над проектами и продуктами,E-Learning Development Fund,https://www.coursera.org/learn/upravleniya-proektami-agile-scrum,upravleniya-proektami-agile-scrum
499
+ Renewable Energy and Green Building Entrepreneurship,Duke University,https://www.coursera.org/learn/renewable-energy-entrepreneurship,renewable-energy-entrepreneurship
500
+ The Cycle: Management of Successful Arts and Cultural Organizations,"University of Maryland, College Park",https://www.coursera.org/learn/the-cycle,the-cycle
501
+ Privacy Law and Data Protection,University of Pennsylvania,https://www.coursera.org/learn/privacy-law-data-protection,privacy-law-data-protection
502
+ Building Modern Python Applications on AWS,Amazon Web Services,https://www.coursera.org/learn/building-modern-python-applications-on-aws,building-modern-python-applications-on-aws
503
+ Digital Business Models,Lund University,https://www.coursera.org/learn/digital-business-models,digital-business-models
504
+ "Everyday Excel, Part 2",University of Colorado Boulder,https://www.coursera.org/learn/everyday-excel-part-2,everyday-excel-part-2
505
+ Reliable Google Cloud Infrastructure: Design and Process,Google Cloud,https://www.coursera.org/learn/cloud-infrastructure-design-process,cloud-infrastructure-design-process
506
+ Introduction to Computer Programming,University of London,https://www.coursera.org/learn/introduction-to-computer-programming,introduction-to-computer-programming
507
+ "Big Data Essentials: HDFS, MapReduce and Spark RDD",Yandex,https://www.coursera.org/learn/big-data-essentials,big-data-essentials
508
+ Dermatology: Trip to skin,Novosibirsk State University ,https://www.coursera.org/learn/dermatology,dermatology
509
+ Sustainable Tourism – promoting environmental public health,University of Copenhagen,https://www.coursera.org/learn/sustainable-tourism,sustainable-tourism
510
+ Population Health During A Pandemic: Contact Tracing and Beyond,University of Houston,https://www.coursera.org/learn/contact-tracing-for-covid-19,contact-tracing-for-covid-19
511
+ Social Impact Strategy: Tools for Entrepreneurs and Innovators,University of Pennsylvania,https://www.coursera.org/learn/social-impact,social-impact
512
+ C for Everyone: Programming Fundamentals,"University of California, Santa Cruz",https://www.coursera.org/learn/c-for-everyone,c-for-everyone
513
+ Introduction to Structured Query Language (SQL),University of Michigan,https://www.coursera.org/learn/intro-sql,intro-sql
514
+ Social and Economic Networks: Models and Analysis,Stanford University,https://www.coursera.org/learn/social-economic-networks,social-economic-networks
515
+ The Truth About Cats and Dogs,The University of Edinburgh,https://www.coursera.org/learn/cats-and-dogs,cats-and-dogs
516
+ Sports and Society,Duke University,https://www.coursera.org/learn/sports-society,sports-society
517
+ Fundamentals of Scalable Data Science,IBM,https://www.coursera.org/learn/ds,ds
518
+ Effective Compliance Programs,University of Pennsylvania,https://www.coursera.org/learn/effective-compliance-programs,effective-compliance-programs
519
+ Transformation of the Global Food System,University of Copenhagen,https://www.coursera.org/learn/transformation-global-food-system,transformation-global-food-system
520
+ Web Application Technologies and Django,University of Michigan,https://www.coursera.org/learn/django-database-web-apps,django-database-web-apps
521
+ Curanderismo: Traditional Healing Using Plants,University of New Mexico,https://www.coursera.org/learn/curanderismo-plants,curanderismo-plants
522
+ Applied Machine Learning in Python,University of Michigan,https://www.coursera.org/learn/python-machine-learning,python-machine-learning
523
+ Troubleshooting and Debugging Techniques,Google,https://www.coursera.org/learn/troubleshooting-debugging-techniques,troubleshooting-debugging-techniques
524
+ Introduction to C# Programming and Unity,University of Colorado System,https://www.coursera.org/learn/introduction-programming-unity,introduction-programming-unity
525
+ "Corrección, estilo y variaciones de la lengua española",Universitat Autònoma de Barcelona,https://www.coursera.org/learn/correccion-estilo-variaciones,correccion-estilo-variaciones
526
+ Les Fondamentaux de la Négociation,ESSEC Business School,https://www.coursera.org/learn/fondamentaux-negociation,fondamentaux-negociation
527
+ Introduction to Clinical Data,Stanford University,https://www.coursera.org/learn/introduction-clinical-data,introduction-clinical-data
528
+ The Science of Success: What Researchers Know that You Should Know,University of Michigan,https://www.coursera.org/learn/success,success
529
+ Finance for Non-Financial Managers,Emory University,https://www.coursera.org/learn/finance-for-non-financial-managers,finance-for-non-financial-managers
530
+ Getting Started With Music Theory,Michigan State University,https://www.coursera.org/learn/music-theory,music-theory
531
+ Digital Marketing Analytics in Practice,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/digital-analytics,digital-analytics
532
+ The Horse Course: Introduction to Basic Care and Management,University of Florida,https://www.coursera.org/learn/horse-care,horse-care
533
+ Verb Tenses and Passives,"University of California, Irvine",https://www.coursera.org/learn/verb-passives,verb-passives
534
+ Gestión de organizaciones efectivas,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/gestion-organizaciones-efectivas,gestion-organizaciones-efectivas
535
+ Introduction to Big Data,University of California San Diego,https://www.coursera.org/learn/big-data-introduction,big-data-introduction
536
+ Rédaction de contrats,University of Geneva,https://www.coursera.org/learn/contrats,contrats
537
+ Philosophy and the Sciences: Introduction to the Philosophy of Cognitive Sciences,The University of Edinburgh,https://www.coursera.org/learn/philosophy-cognitive-sciences,philosophy-cognitive-sciences
538
+ Schizophrenia,Wesleyan University,https://www.coursera.org/learn/schizophrenia,schizophrenia
539
+ Ecology: Ecosystem Dynamics and Conservation,Howard Hughes Medical Institute ,https://www.coursera.org/learn/ecology-conservation,ecology-conservation
540
+ Introduction to Game Development,Michigan State University,https://www.coursera.org/learn/game-development,game-development
541
+ Practical Time Series Analysis,The State University of New York,https://www.coursera.org/learn/practical-time-series-analysis,practical-time-series-analysis
542
+ VLSI CAD Part I: Logic,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/vlsi-cad-logic,vlsi-cad-logic
543
+ Getting Started with Google Kubernetes Engine,Google Cloud,https://www.coursera.org/learn/google-kubernetes-engine,google-kubernetes-engine
544
+ Exploring ​and ​Preparing ​your ​Data with BigQuery,Google Cloud,https://www.coursera.org/learn/gcp-exploring-preparing-data-bigquery,gcp-exploring-preparing-data-bigquery
545
+ Six Sigma and the Organization (Advanced),University System of Georgia,https://www.coursera.org/learn/six-sigma-organization-advanced,six-sigma-organization-advanced
546
+ Social Work Practice: Advocating Social Justice and Change,University of Michigan,https://www.coursera.org/learn/social-work-practice-advocating-social-justice-and-change,social-work-practice-advocating-social-justice-and-change
547
+ Introduction to Machine Learning,Duke University,https://www.coursera.org/learn/machine-learning-duke,machine-learning-duke
548
+ Entrepreneurship 2: Launching your Start-Up,University of Pennsylvania,https://www.coursera.org/learn/wharton-launching-startup,wharton-launching-startup
549
+ Nanotechnology: A Maker’s Course,Duke University,https://www.coursera.org/learn/nanotechnology,nanotechnology
550
+ Creative Problem Solving,University of Minnesota,https://www.coursera.org/learn/creative-problem-solving,creative-problem-solving
551
+ "Sleep: Neurobiology, Medicine, and Society",University of Michigan,https://www.coursera.org/learn/sleep,sleep
552
+ COVID-19: What You Need to Know (CME Eligible),Osmosis,https://www.coursera.org/learn/covid-19-what-you-need-to-know,covid-19-what-you-need-to-know
553
+ Classical Sociological Theory,University of Amsterdam,https://www.coursera.org/learn/classical-sociological-theory,classical-sociological-theory
554
+ Electric Industry Operations and Markets,Duke University,https://www.coursera.org/learn/electricity,electricity
555
+ Preparing for the Google Cloud Professional Cloud Architect Exam,Google Cloud,https://www.coursera.org/learn/preparing-cloud-professional-cloud-architect-exam,preparing-cloud-professional-cloud-architect-exam
556
+ Effective Business Presentations with Powerpoint,PwC,https://www.coursera.org/learn/powerpoint-presentations,powerpoint-presentations
557
+ More Introduction to Financial Accounting,University of Pennsylvania,https://www.coursera.org/learn/wharton-financial-accounting,wharton-financial-accounting
558
+ Cryptocurrency and Blockchain: An Introduction to Digital Currencies,University of Pennsylvania,https://www.coursera.org/learn/wharton-cryptocurrency-blockchain-introduction-digital-currency,wharton-cryptocurrency-blockchain-introduction-digital-currency
559
+ Dairy Production and Management,The Pennsylvania State University,https://www.coursera.org/learn/dairy-production,dairy-production
560
+ Think Again I: How to Understand Arguments,Duke University,https://www.coursera.org/learn/understanding-arguments,understanding-arguments
561
+ Developing Your Musicianship,Berklee College of Music,https://www.coursera.org/learn/develop-your-musicianship,develop-your-musicianship
562
+ Introduction to Operations Management,University of Pennsylvania,https://www.coursera.org/learn/wharton-operations,wharton-operations
563
+ The Oral Cavity: Portal to Health and Disease,University of Pennsylvania,https://www.coursera.org/learn/oralcavity,oralcavity
564
+ Preparing for the Google Cloud Associate Cloud Engineer Exam,Google Cloud,https://www.coursera.org/learn/preparing-cloud-associate-cloud-engineer-exam,preparing-cloud-associate-cloud-engineer-exam
565
+ Fundamental Neuroscience for Neuroimaging,Johns Hopkins University,https://www.coursera.org/learn/neuroscience-neuroimaging,neuroscience-neuroimaging
566
+ European Business Law: Understanding the Fundamentals,Lund University,https://www.coursera.org/learn/european-law-fundamentals,european-law-fundamentals
567
+ Teach English Now! Teaching Language Online,Arizona State University,https://www.coursera.org/learn/teachlanguageonline,teachlanguageonline
568
+ Front-End Web Development with React,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/front-end-react,front-end-react
569
+ Music Business Foundations,Berklee College of Music,https://www.coursera.org/learn/music-business-foundations,music-business-foundations
570
+ Introduction to Business Analytics: Communicating with Data,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/intro-business-analytics,intro-business-analytics
571
+ Security & Safety Challenges in a Globalized World,Universiteit Leiden,https://www.coursera.org/learn/security-safety-globalized-world,security-safety-globalized-world
572
+ Machine Learning Foundations: A Case Study Approach,University of Washington,https://www.coursera.org/learn/ml-foundations,ml-foundations
573
+ Accounting Analytics,University of Pennsylvania,https://www.coursera.org/learn/accounting-analytics,accounting-analytics
574
+ Strategic Business Management - Microeconomics,"University of California, Irvine",https://www.coursera.org/learn/strategic-business-management-microeconomics,strategic-business-management-microeconomics
575
+ "Epidemics, Pandemics and Outbreaks",University of Pittsburgh,https://www.coursera.org/learn/epidemic-pandemic-outbreak,epidemic-pandemic-outbreak
576
+ Roman Architecture,Yale University,https://www.coursera.org/learn/roman-architecture,roman-architecture
577
+ Research Data Management and Sharing,The University of Edinburgh,https://www.coursera.org/learn/data-management,data-management
578
+ Introduction to Genomic Technologies,Johns Hopkins University,https://www.coursera.org/learn/introduction-genomics,introduction-genomics
579
+ Strategic Management,Copenhagen Business School,https://www.coursera.org/learn/strategic-management,strategic-management
580
+ Cybersecurity Compliance Framework & System Administration,IBM,https://www.coursera.org/learn/cybersecurity-compliance-framework-system-administration,cybersecurity-compliance-framework-system-administration
581
+ Legal Tech & Startups,IE Business School,https://www.coursera.org/learn/legal-tech-startups,legal-tech-startups
582
+ Introduction to Chemistry: Reactions and Ratios,Duke University,https://www.coursera.org/learn/intro-chemistry,intro-chemistry
583
+ The Science of Stem Cells,American Museum of Natural History,https://www.coursera.org/learn/stem-cells,stem-cells
584
+ The Business of Product Management I,Advancing Women in Product,https://www.coursera.org/learn/the-business-of-product-management-one,the-business-of-product-management-one
585
+ Positive Psychology: Resilience Skills,University of Pennsylvania,https://www.coursera.org/learn/positive-psychology-resilience,positive-psychology-resilience
586
+ AI for Medical Prognosis,DeepLearning.AI,https://www.coursera.org/learn/ai-for-medical-prognosis,ai-for-medical-prognosis
587
+ Antibiotic Stewardship,Stanford University,https://www.coursera.org/learn/antibiotic-stewardship,antibiotic-stewardship
588
+ UX / UI: Fundamentos para o design de interface,Universidade de São Paulo,https://www.coursera.org/learn/ux-ui-design-de-interface,ux-ui-design-de-interface
589
+ EMT Foundations,University of Colorado System,https://www.coursera.org/learn/emt-foundations,emt-foundations
590
+ Industrial IoT on Google Cloud Platform,Google Cloud,https://www.coursera.org/learn/iiot-google-cloud-platform,iiot-google-cloud-platform
591
+ "Penetration Testing, Incident Response and Forensics",IBM,https://www.coursera.org/learn/ibm-penetration-testing-incident-response-forensics,ibm-penetration-testing-incident-response-forensics
592
+ Database Management Essentials,University of Colorado System,https://www.coursera.org/learn/database-management,database-management
593
+ Advertising and Society,Duke University,https://www.coursera.org/learn/role-of-advertising,role-of-advertising
594
+ Everyday Chinese Medicine,The Chinese University of Hong Kong,https://www.coursera.org/learn/everyday-chinese-medicine,everyday-chinese-medicine
595
+ Fundamentals of Machine Learning for Healthcare,Stanford University,https://www.coursera.org/learn/fundamental-machine-learning-healthcare,fundamental-machine-learning-healthcare
596
+ New Approaches to Countering Terror: Countering Violent Extremism,"University of Maryland, College Park",https://www.coursera.org/learn/countering-terror-violent-extremism,countering-terror-violent-extremism
597
+ Magic in the Middle Ages,Universitat de Barcelona,https://www.coursera.org/learn/magic-middle-ages,magic-middle-ages
598
+ Challenging Forensic Science: How Science Should Speak to Court,University of Lausanne,https://www.coursera.org/learn/challenging-forensic-science,challenging-forensic-science
599
+ Exploring Renewable Energy Schemes,University of Pennsylvania,https://www.coursera.org/learn/exploring-renewable-energy,exploring-renewable-energy
600
+ Paleontology: Theropod Dinosaurs and the Origin of Birds,University of Alberta,https://www.coursera.org/learn/theropods-birds,theropods-birds
601
+ Corporate Strategy,UCL School of Management,https://www.coursera.org/learn/corporatestrategy,corporatestrategy
602
+ Getting Started with Essay Writing,"University of California, Irvine",https://www.coursera.org/learn/getting-started-with-essay-writing,getting-started-with-essay-writing
603
+ Actualización en el manejo del paciente con diabetes mellitus tipo 2,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/actualizacion-manejo-diabetes-tipo-2,actualizacion-manejo-diabetes-tipo-2
604
+ Mastering Final Cut Pro,LearnQuest,https://www.coursera.org/learn/mastering-final-cut-pro,mastering-final-cut-pro
605
+ Removing Barriers to Change,University of Pennsylvania,https://www.coursera.org/learn/removing-barriers-to-change,removing-barriers-to-change
606
+ Tricky American English Pronunciation,"University of California, Irvine",https://www.coursera.org/learn/tricky-american-english-pronunciation,tricky-american-english-pronunciation
607
+ Managing an Agile Team,University of Virginia,https://www.coursera.org/learn/uva-darden-agile-team-management,uva-darden-agile-team-management
608
+ International Law in Action: A Guide to the International Courts and Tribunals in The Hague,Universiteit Leiden,https://www.coursera.org/learn/international-law-in-action,international-law-in-action
609
+ "Competencias Laborales: Perfiles, Evaluación y Capacitación.",Universidad de Chile,https://www.coursera.org/learn/competencias-lab,competencias-lab
610
+ Getting started with TensorFlow 2,Imperial College London,https://www.coursera.org/learn/getting-started-with-tensor-flow2,getting-started-with-tensor-flow2
611
+ Geopolitics of Europe,Sciences Po,https://www.coursera.org/learn/geopolitics-europe,geopolitics-europe
612
+ Osteoarchaeology: The Truth in Our Bones,Universiteit Leiden,https://www.coursera.org/learn/truthinourbones-osteoarchaeology-archaeology,truthinourbones-osteoarchaeology-archaeology
613
+ Mathematics for Machine Learning: PCA,Imperial College London,https://www.coursera.org/learn/pca-machine-learning,pca-machine-learning
614
+ Object Oriented Programming in Java,University of California San Diego,https://www.coursera.org/learn/object-oriented-java,object-oriented-java
615
+ Embedded Software and Hardware Architecture,University of Colorado Boulder,https://www.coursera.org/learn/embedded-software-hardware,embedded-software-hardware
616
+ Memoir and Personal Essay: Managing Your Relationship with the Reader,Wesleyan University,https://www.coursera.org/learn/memoir-reader-relationship,memoir-reader-relationship
617
+ Математика и Python для анализа данных,Moscow Institute of Physics and Technology,https://www.coursera.org/learn/mathematics-and-python,mathematics-and-python
618
+ Hacia una práctica constructivista en el aula,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/aulaconstructivista,aulaconstructivista
619
+ Hypothesis-Driven Development,University of Virginia,https://www.coursera.org/learn/uva-darden-agile-testing,uva-darden-agile-testing
620
+ Accounting Data Analytics with Python,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/accounting-data-analytics-python,accounting-data-analytics-python
621
+ Introduction to Molecular Spectroscopy,University of Manchester ,https://www.coursera.org/learn/spectroscopy,spectroscopy
622
+ Managing as a Coach,"University of California, Davis",https://www.coursera.org/learn/managing-as-a-coach,managing-as-a-coach
623
+ The fundamentals of hotel distribution,ESSEC Business School,https://www.coursera.org/learn/hotel-distribution,hotel-distribution
624
+ A Crash Course in Data Science,Johns Hopkins University,https://www.coursera.org/learn/data-science-course,data-science-course
course_feedback_nlp/Untitled.ipynb ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "2c0bc557-3218-4715-900e-491cc5560b6a",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n",
11
+ "import numpy as np"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 11,
17
+ "id": "a2d59d9a-5855-4a21-9988-3ea5dd2bb43c",
18
+ "metadata": {},
19
+ "outputs": [],
20
+ "source": [
21
+ "reviews_df = pd.read_csv(\"Coursera_reviews.csv\")\n",
22
+ "courses_df = pd.read_csv(\"Coursera_courses.csv\")"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 12,
28
+ "id": "7af404ff-07f8-489c-b350-263cb33bb277",
29
+ "metadata": {},
30
+ "outputs": [
31
+ {
32
+ "data": {
33
+ "text/html": [
34
+ "<div>\n",
35
+ "<style scoped>\n",
36
+ " .dataframe tbody tr th:only-of-type {\n",
37
+ " vertical-align: middle;\n",
38
+ " }\n",
39
+ "\n",
40
+ " .dataframe tbody tr th {\n",
41
+ " vertical-align: top;\n",
42
+ " }\n",
43
+ "\n",
44
+ " .dataframe thead th {\n",
45
+ " text-align: right;\n",
46
+ " }\n",
47
+ "</style>\n",
48
+ "<table border=\"1\" class=\"dataframe\">\n",
49
+ " <thead>\n",
50
+ " <tr style=\"text-align: right;\">\n",
51
+ " <th></th>\n",
52
+ " <th>reviews</th>\n",
53
+ " <th>reviewers</th>\n",
54
+ " <th>date_reviews</th>\n",
55
+ " <th>rating</th>\n",
56
+ " <th>course_id</th>\n",
57
+ " </tr>\n",
58
+ " </thead>\n",
59
+ " <tbody>\n",
60
+ " <tr>\n",
61
+ " <th>0</th>\n",
62
+ " <td>Pretty dry, but I was able to pass with just t...</td>\n",
63
+ " <td>By Robert S</td>\n",
64
+ " <td>Feb 12, 2020</td>\n",
65
+ " <td>4</td>\n",
66
+ " <td>google-cbrs-cpi-training</td>\n",
67
+ " </tr>\n",
68
+ " <tr>\n",
69
+ " <th>1</th>\n",
70
+ " <td>would be a better experience if the video and ...</td>\n",
71
+ " <td>By Gabriel E R</td>\n",
72
+ " <td>Sep 28, 2020</td>\n",
73
+ " <td>4</td>\n",
74
+ " <td>google-cbrs-cpi-training</td>\n",
75
+ " </tr>\n",
76
+ " <tr>\n",
77
+ " <th>2</th>\n",
78
+ " <td>Information was perfect! The program itself wa...</td>\n",
79
+ " <td>By Jacob D</td>\n",
80
+ " <td>Apr 08, 2020</td>\n",
81
+ " <td>4</td>\n",
82
+ " <td>google-cbrs-cpi-training</td>\n",
83
+ " </tr>\n",
84
+ " <tr>\n",
85
+ " <th>3</th>\n",
86
+ " <td>A few grammatical mistakes on test made me do ...</td>\n",
87
+ " <td>By Dale B</td>\n",
88
+ " <td>Feb 24, 2020</td>\n",
89
+ " <td>4</td>\n",
90
+ " <td>google-cbrs-cpi-training</td>\n",
91
+ " </tr>\n",
92
+ " <tr>\n",
93
+ " <th>4</th>\n",
94
+ " <td>Excellent course and the training provided was...</td>\n",
95
+ " <td>By Sean G</td>\n",
96
+ " <td>Jun 18, 2020</td>\n",
97
+ " <td>4</td>\n",
98
+ " <td>google-cbrs-cpi-training</td>\n",
99
+ " </tr>\n",
100
+ " </tbody>\n",
101
+ "</table>\n",
102
+ "</div>"
103
+ ],
104
+ "text/plain": [
105
+ " reviews reviewers \\\n",
106
+ "0 Pretty dry, but I was able to pass with just t... By Robert S \n",
107
+ "1 would be a better experience if the video and ... By Gabriel E R \n",
108
+ "2 Information was perfect! The program itself wa... By Jacob D \n",
109
+ "3 A few grammatical mistakes on test made me do ... By Dale B \n",
110
+ "4 Excellent course and the training provided was... By Sean G \n",
111
+ "\n",
112
+ " date_reviews rating course_id \n",
113
+ "0 Feb 12, 2020 4 google-cbrs-cpi-training \n",
114
+ "1 Sep 28, 2020 4 google-cbrs-cpi-training \n",
115
+ "2 Apr 08, 2020 4 google-cbrs-cpi-training \n",
116
+ "3 Feb 24, 2020 4 google-cbrs-cpi-training \n",
117
+ "4 Jun 18, 2020 4 google-cbrs-cpi-training "
118
+ ]
119
+ },
120
+ "execution_count": 12,
121
+ "metadata": {},
122
+ "output_type": "execute_result"
123
+ }
124
+ ],
125
+ "source": [
126
+ "reviews_df.head()"
127
+ ]
128
+ },
129
+ {
130
+ "cell_type": "code",
131
+ "execution_count": 13,
132
+ "id": "8e1bef72-cba2-4431-b111-03bc0c872ee0",
133
+ "metadata": {},
134
+ "outputs": [
135
+ {
136
+ "data": {
137
+ "text/html": [
138
+ "<div>\n",
139
+ "<style scoped>\n",
140
+ " .dataframe tbody tr th:only-of-type {\n",
141
+ " vertical-align: middle;\n",
142
+ " }\n",
143
+ "\n",
144
+ " .dataframe tbody tr th {\n",
145
+ " vertical-align: top;\n",
146
+ " }\n",
147
+ "\n",
148
+ " .dataframe thead th {\n",
149
+ " text-align: right;\n",
150
+ " }\n",
151
+ "</style>\n",
152
+ "<table border=\"1\" class=\"dataframe\">\n",
153
+ " <thead>\n",
154
+ " <tr style=\"text-align: right;\">\n",
155
+ " <th></th>\n",
156
+ " <th>name</th>\n",
157
+ " <th>institution</th>\n",
158
+ " <th>course_url</th>\n",
159
+ " <th>course_id</th>\n",
160
+ " </tr>\n",
161
+ " </thead>\n",
162
+ " <tbody>\n",
163
+ " <tr>\n",
164
+ " <th>0</th>\n",
165
+ " <td>Machine Learning</td>\n",
166
+ " <td>Stanford University</td>\n",
167
+ " <td>https://www.coursera.org/learn/machine-learning</td>\n",
168
+ " <td>machine-learning</td>\n",
169
+ " </tr>\n",
170
+ " <tr>\n",
171
+ " <th>1</th>\n",
172
+ " <td>Indigenous Canada</td>\n",
173
+ " <td>University of Alberta</td>\n",
174
+ " <td>https://www.coursera.org/learn/indigenous-canada</td>\n",
175
+ " <td>indigenous-canada</td>\n",
176
+ " </tr>\n",
177
+ " <tr>\n",
178
+ " <th>2</th>\n",
179
+ " <td>The Science of Well-Being</td>\n",
180
+ " <td>Yale University</td>\n",
181
+ " <td>https://www.coursera.org/learn/the-science-of-...</td>\n",
182
+ " <td>the-science-of-well-being</td>\n",
183
+ " </tr>\n",
184
+ " <tr>\n",
185
+ " <th>3</th>\n",
186
+ " <td>Technical Support Fundamentals</td>\n",
187
+ " <td>Google</td>\n",
188
+ " <td>https://www.coursera.org/learn/technical-suppo...</td>\n",
189
+ " <td>technical-support-fundamentals</td>\n",
190
+ " </tr>\n",
191
+ " <tr>\n",
192
+ " <th>4</th>\n",
193
+ " <td>Become a CBRS Certified Professional Installer...</td>\n",
194
+ " <td>Google - Spectrum Sharing</td>\n",
195
+ " <td>https://www.coursera.org/learn/google-cbrs-cpi...</td>\n",
196
+ " <td>google-cbrs-cpi-training</td>\n",
197
+ " </tr>\n",
198
+ " </tbody>\n",
199
+ "</table>\n",
200
+ "</div>"
201
+ ],
202
+ "text/plain": [
203
+ " name \\\n",
204
+ "0 Machine Learning \n",
205
+ "1 Indigenous Canada \n",
206
+ "2 The Science of Well-Being \n",
207
+ "3 Technical Support Fundamentals \n",
208
+ "4 Become a CBRS Certified Professional Installer... \n",
209
+ "\n",
210
+ " institution \\\n",
211
+ "0 Stanford University \n",
212
+ "1 University of Alberta \n",
213
+ "2 Yale University \n",
214
+ "3 Google \n",
215
+ "4 Google - Spectrum Sharing \n",
216
+ "\n",
217
+ " course_url \\\n",
218
+ "0 https://www.coursera.org/learn/machine-learning \n",
219
+ "1 https://www.coursera.org/learn/indigenous-canada \n",
220
+ "2 https://www.coursera.org/learn/the-science-of-... \n",
221
+ "3 https://www.coursera.org/learn/technical-suppo... \n",
222
+ "4 https://www.coursera.org/learn/google-cbrs-cpi... \n",
223
+ "\n",
224
+ " course_id \n",
225
+ "0 machine-learning \n",
226
+ "1 indigenous-canada \n",
227
+ "2 the-science-of-well-being \n",
228
+ "3 technical-support-fundamentals \n",
229
+ "4 google-cbrs-cpi-training "
230
+ ]
231
+ },
232
+ "execution_count": 13,
233
+ "metadata": {},
234
+ "output_type": "execute_result"
235
+ }
236
+ ],
237
+ "source": [
238
+ "courses_df.head()"
239
+ ]
240
+ },
241
+ {
242
+ "cell_type": "code",
243
+ "execution_count": 1,
244
+ "id": "0ba0e446-f8ac-4949-868f-2d70e282f25e",
245
+ "metadata": {},
246
+ "outputs": [
247
+ {
248
+ "data": {
249
+ "application/vnd.jupyter.widget-view+json": {
250
+ "model_id": "0984692eeaa447a6a9dd70435c72e55d",
251
+ "version_major": 2,
252
+ "version_minor": 0
253
+ },
254
+ "text/plain": [
255
+ "config.json: 0%| | 0.00/483 [00:00<?, ?B/s]"
256
+ ]
257
+ },
258
+ "metadata": {},
259
+ "output_type": "display_data"
260
+ },
261
+ {
262
+ "data": {
263
+ "application/vnd.jupyter.widget-view+json": {
264
+ "model_id": "5bf297ecaf37442eb4b01f6a8ac2b69f",
265
+ "version_major": 2,
266
+ "version_minor": 0
267
+ },
268
+ "text/plain": [
269
+ "tokenizer_config.json: 0%| | 0.00/48.0 [00:00<?, ?B/s]"
270
+ ]
271
+ },
272
+ "metadata": {},
273
+ "output_type": "display_data"
274
+ },
275
+ {
276
+ "data": {
277
+ "application/vnd.jupyter.widget-view+json": {
278
+ "model_id": "6445393b608c4822bd90bdf2f1692a0e",
279
+ "version_major": 2,
280
+ "version_minor": 0
281
+ },
282
+ "text/plain": [
283
+ "vocab.txt: 0.00B [00:00, ?B/s]"
284
+ ]
285
+ },
286
+ "metadata": {},
287
+ "output_type": "display_data"
288
+ },
289
+ {
290
+ "data": {
291
+ "application/vnd.jupyter.widget-view+json": {
292
+ "model_id": "09f22b8c6976416d96c5af9bef5e25e4",
293
+ "version_major": 2,
294
+ "version_minor": 0
295
+ },
296
+ "text/plain": [
297
+ "tokenizer.json: 0.00B [00:00, ?B/s]"
298
+ ]
299
+ },
300
+ "metadata": {},
301
+ "output_type": "display_data"
302
+ },
303
+ {
304
+ "data": {
305
+ "application/vnd.jupyter.widget-view+json": {
306
+ "model_id": "6049ddbbb2e94125b20c61d3d9ab5cb4",
307
+ "version_major": 2,
308
+ "version_minor": 0
309
+ },
310
+ "text/plain": [
311
+ "model.safetensors: 0%| | 0.00/268M [00:00<?, ?B/s]"
312
+ ]
313
+ },
314
+ "metadata": {},
315
+ "output_type": "display_data"
316
+ },
317
+ {
318
+ "data": {
319
+ "application/vnd.jupyter.widget-view+json": {
320
+ "model_id": "3a7f44f7809e41fd86309b76ee57f0dc",
321
+ "version_major": 2,
322
+ "version_minor": 0
323
+ },
324
+ "text/plain": [
325
+ "pytorch_model.bin: 0%| | 0.00/268M [00:00<?, ?B/s]"
326
+ ]
327
+ },
328
+ "metadata": {},
329
+ "output_type": "display_data"
330
+ },
331
+ {
332
+ "ename": "OSError",
333
+ "evalue": "distilbert/distilbert-base-uncased does not appear to have a file named pytorch_model.bin or model.safetensors.",
334
+ "output_type": "error",
335
+ "traceback": [
336
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
337
+ "\u001b[31mOSError\u001b[39m Traceback (most recent call last)",
338
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtransformers\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForMaskedLM\n\u001b[32m 4\u001b[39m tokenizer = AutoTokenizer.from_pretrained(\u001b[33m\"\u001b[39m\u001b[33mdistilbert/distilbert-base-uncased\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m model = \u001b[43mAutoModelForMaskedLM\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mdistilbert/distilbert-base-uncased\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
339
+ "\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\students\\course_feedback_nlp\\pytorch\\Lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:372\u001b[39m, in \u001b[36m_BaseAutoModelClass.from_pretrained\u001b[39m\u001b[34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[39m\n\u001b[32m 370\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m model_class.config_class == config.sub_configs.get(\u001b[33m\"\u001b[39m\u001b[33mtext_config\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m 371\u001b[39m config = config.get_text_config()\n\u001b[32m--> \u001b[39m\u001b[32m372\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 373\u001b[39m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[32m 374\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 375\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m 376\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig.\u001b[34m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m.\u001b[34m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 377\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m, \u001b[39m\u001b[33m'\u001b[39m.join(c.\u001b[34m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m._model_mapping)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 378\u001b[39m )\n",
340
+ "\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\students\\course_feedback_nlp\\pytorch\\Lib\\site-packages\\transformers\\modeling_utils.py:4038\u001b[39m, in \u001b[36mPreTrainedModel.from_pretrained\u001b[39m\u001b[34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)\u001b[39m\n\u001b[32m 4033\u001b[39m logger.warning_once(\n\u001b[32m 4034\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mA kernel_config was provided but use_kernels is False; setting use_kernels=True automatically. To suppress this warning, explicitly set use_kernels to True.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 4035\u001b[39m )\n\u001b[32m 4036\u001b[39m use_kernels = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m4038\u001b[39m checkpoint_files, sharded_metadata = \u001b[43m_get_resolved_checkpoint_files\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 4039\u001b[39m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4040\u001b[39m \u001b[43m \u001b[49m\u001b[43mvariant\u001b[49m\u001b[43m=\u001b[49m\u001b[43mvariant\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4041\u001b[39m \u001b[43m \u001b[49m\u001b[43mgguf_file\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgguf_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4042\u001b[39m \u001b[43m \u001b[49m\u001b[43muse_safetensors\u001b[49m\u001b[43m=\u001b[49m\u001b[43muse_safetensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4043\u001b[39m \u001b[43m \u001b[49m\u001b[43mdownload_kwargs\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdownload_kwargs_with_commit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4044\u001b[39m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[43m=\u001b[49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4045\u001b[39m \u001b[43m \u001b[49m\u001b[43mis_remote_code\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_auto_class\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 4046\u001b[39m \u001b[43m \u001b[49m\u001b[43mtransformers_explicit_filename\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtransformers_weights\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 4047\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4049\u001b[39m is_quantized = hf_quantizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 4051\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m gguf_file:\n",
341
+ "\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\students\\course_feedback_nlp\\pytorch\\Lib\\site-packages\\transformers\\modeling_utils.py:710\u001b[39m, in \u001b[36m_get_resolved_checkpoint_files\u001b[39m\u001b[34m(pretrained_model_name_or_path, variant, gguf_file, use_safetensors, user_agent, is_remote_code, transformers_explicit_filename, download_kwargs)\u001b[39m\n\u001b[32m 704\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\n\u001b[32m 705\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m does not appear to have a file named\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 706\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_add_variant(WEIGHTS_NAME,\u001b[38;5;250m \u001b[39mvariant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m but there is a file without the variant\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 707\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvariant\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m. Use `variant=None` to load this model from those weights.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 708\u001b[39m )\n\u001b[32m 709\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m710\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\n\u001b[32m 711\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m does not appear to have a file named\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 712\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_add_variant(WEIGHTS_NAME,\u001b[38;5;250m \u001b[39mvariant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m or \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_add_variant(SAFE_WEIGHTS_NAME,\u001b[38;5;250m \u001b[39mvariant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 713\u001b[39m )\n\u001b[32m 715\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m:\n\u001b[32m 716\u001b[39m \u001b[38;5;66;03m# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted\u001b[39;00m\n\u001b[32m 717\u001b[39m \u001b[38;5;66;03m# to the original exception.\u001b[39;00m\n\u001b[32m 718\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m\n",
342
+ "\u001b[31mOSError\u001b[39m: distilbert/distilbert-base-uncased does not appear to have a file named pytorch_model.bin or model.safetensors."
343
+ ]
344
+ }
345
+ ],
346
+ "source": [
347
+ "# Load model directly\n",
348
+ "from transformers import AutoTokenizer, AutoModelForMaskedLM\n",
349
+ "\n",
350
+ "tokenizer = AutoTokenizer.from_pretrained(\"distilbert/distilbert-base-uncased\")\n",
351
+ "model = AutoModelForMaskedLM.from_pretrained(\"distilbert/distilbert-base-uncased\")"
352
+ ]
353
+ },
354
+ {
355
+ "cell_type": "code",
356
+ "execution_count": 2,
357
+ "id": "266c5278-0eb4-4daf-b18f-98d9d426ed70",
358
+ "metadata": {},
359
+ "outputs": [],
360
+ "source": [
361
+ "import transformers"
362
+ ]
363
+ },
364
+ {
365
+ "cell_type": "code",
366
+ "execution_count": 3,
367
+ "id": "f687230b-a74c-4b63-92f9-f6bc378feecc",
368
+ "metadata": {},
369
+ "outputs": [
370
+ {
371
+ "data": {
372
+ "text/plain": [
373
+ "\u001b[31mType:\u001b[39m _LazyModule\n",
374
+ "\u001b[31mString form:\u001b[39m <module 'transformers' from 'C:\\\\Users\\\\PC\\\\Documents\\\\students\\\\course_feedback_nlp\\\\pytorch\\\\Lib\\\\site-packages\\\\transformers\\\\__init__.py'>\n",
375
+ "\u001b[31mFile:\u001b[39m c:\\users\\pc\\documents\\students\\course_feedback_nlp\\pytorch\\lib\\site-packages\\transformers\\__init__.py\n",
376
+ "\u001b[31mDocstring:\u001b[39m <no docstring>\n",
377
+ "\u001b[31mClass docstring:\u001b[39m Module class that surfaces all objects but only performs associated imports when the objects are requested."
378
+ ]
379
+ },
380
+ "metadata": {},
381
+ "output_type": "display_data"
382
+ }
383
+ ],
384
+ "source": [
385
+ "transformers?"
386
+ ]
387
+ },
388
+ {
389
+ "cell_type": "code",
390
+ "execution_count": null,
391
+ "id": "bf2d9082-1d36-4fde-ad24-8e7599c65acc",
392
+ "metadata": {},
393
+ "outputs": [],
394
+ "source": []
395
+ }
396
+ ],
397
+ "metadata": {
398
+ "kernelspec": {
399
+ "display_name": "pytorch",
400
+ "language": "python",
401
+ "name": "pytorch"
402
+ },
403
+ "language_info": {
404
+ "codemirror_mode": {
405
+ "name": "ipython",
406
+ "version": 3
407
+ },
408
+ "file_extension": ".py",
409
+ "mimetype": "text/x-python",
410
+ "name": "python",
411
+ "nbconvert_exporter": "python",
412
+ "pygments_lexer": "ipython3",
413
+ "version": "3.12.7"
414
+ }
415
+ },
416
+ "nbformat": 4,
417
+ "nbformat_minor": 5
418
+ }
course_feedback_nlp/evaluate.py ADDED
@@ -0,0 +1,548 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Student Feedback Sentiment Model - Evaluation Script
3
+ ====================================================
4
+ Run this after training to complete:
5
+ - Test evaluation
6
+ - Generate plots
7
+ - Save results
8
+ """
9
+
10
+ import torch
11
+ import torch.nn.functional as F
12
+ from torch.utils.data import DataLoader, TensorDataset
13
+ import pandas as pd
14
+ import numpy as np
15
+ from sklearn.model_selection import train_test_split
16
+ from sklearn.metrics import classification_report, confusion_matrix
17
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
18
+ from tqdm.auto import tqdm
19
+ import matplotlib.pyplot as plt
20
+ import seaborn as sns
21
+ import os
22
+ import json
23
+ import gc
24
+ import warnings
25
+ warnings.filterwarnings('ignore')
26
+
27
+ # ============================================================
28
+ # CONFIGURATION (must match training!)
29
+ # ============================================================
30
+
31
+ CONFIG = {
32
+ 'data_path': 'Coursera_reviews.csv',
33
+ 'base_model': './distilbert-base-uncased',
34
+ 'output_dir': 'teacher_sentiment_model',
35
+ 'num_classes': 3,
36
+ 'class_names': ['Negative', 'Neutral', 'Positive'],
37
+ 'class_mapping': {
38
+ 0: 0, # 1-star → Negative
39
+ 1: 0, # 2-star → Negative
40
+ 2: 1, # 3-star → Neutral
41
+ 3: 2, # 4-star → Positive
42
+ 4: 2, # 5-star → Positive
43
+ },
44
+ 'max_length': 96,
45
+ 'batch_size': 128,
46
+ 'test_size': 0.1,
47
+ 'seed': 42,
48
+ 'num_workers': 4,
49
+ 'use_amp': True,
50
+ }
51
+
52
+
53
+ def set_seed(seed):
54
+ torch.manual_seed(seed)
55
+ torch.cuda.manual_seed_all(seed)
56
+ np.random.seed(seed)
57
+
58
+
59
+ def tokenize_batch(texts, tokenizer, max_length, desc="Tokenizing"):
60
+ all_input_ids = []
61
+ all_attention_masks = []
62
+ batch_size = 10000
63
+
64
+ for i in tqdm(range(0, len(texts), batch_size), desc=desc):
65
+ batch_texts = texts[i:i+batch_size].tolist()
66
+ encodings = tokenizer(
67
+ batch_texts,
68
+ truncation=True,
69
+ padding='max_length',
70
+ max_length=max_length,
71
+ return_tensors='pt'
72
+ )
73
+ all_input_ids.append(encodings['input_ids'])
74
+ all_attention_masks.append(encodings['attention_mask'])
75
+
76
+ return torch.cat(all_input_ids, dim=0), torch.cat(all_attention_masks, dim=0)
77
+
78
+
79
+ def main():
80
+ set_seed(CONFIG['seed'])
81
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
82
+
83
+ print("=" * 70)
84
+ print("STUDENT FEEDBACK SENTIMENT MODEL - EVALUATION")
85
+ print("=" * 70)
86
+ print(f"Device: {device}")
87
+ if torch.cuda.is_available():
88
+ print(f"GPU: {torch.cuda.get_device_name(0)}")
89
+ print()
90
+
91
+ os.makedirs('plots', exist_ok=True)
92
+
93
+ # ============================================================
94
+ # FIX CONFIG.JSON (the bug from training)
95
+ # ============================================================
96
+
97
+ print("FIXING MODEL CONFIG")
98
+ print("-" * 70)
99
+
100
+ # Load original config from base model
101
+ original_config = AutoConfig.from_pretrained(
102
+ CONFIG['base_model'],
103
+ local_files_only=True
104
+ )
105
+
106
+ # Update for our task
107
+ original_config.num_labels = CONFIG['num_classes']
108
+ original_config.id2label = {i: name for i, name in enumerate(CONFIG['class_names'])}
109
+ original_config.label2id = {name: i for i, name in enumerate(CONFIG['class_names'])}
110
+
111
+ # Save corrected config
112
+ original_config.save_pretrained(CONFIG['output_dir'])
113
+ print(f" ✓ Fixed config.json in {CONFIG['output_dir']}/")
114
+
115
+ # Save our training config separately
116
+ training_config = {
117
+ 'num_classes': CONFIG['num_classes'],
118
+ 'class_names': CONFIG['class_names'],
119
+ 'class_mapping': CONFIG['class_mapping'],
120
+ 'max_length': CONFIG['max_length'],
121
+ }
122
+ with open(os.path.join(CONFIG['output_dir'], 'training_config.json'), 'w') as f:
123
+ json.dump(training_config, f, indent=2)
124
+ print(f" ✓ Saved training_config.json")
125
+ print()
126
+
127
+ # ============================================================
128
+ # LOAD DATA (only need test set)
129
+ # ============================================================
130
+
131
+ print("LOADING DATA")
132
+ print("-" * 70)
133
+
134
+ df = pd.read_csv(CONFIG['data_path'])
135
+ print(f"Raw data: {len(df):,} samples")
136
+
137
+ # Clean
138
+ df = df.dropna(subset=['reviews', 'rating'])
139
+ df = df[df['reviews'].str.strip() != '']
140
+ df['rating'] = df['rating'].astype(int)
141
+ df = df[df['rating'].between(1, 5)]
142
+
143
+ # Map to 3 classes
144
+ df['label_5class'] = df['rating'] - 1
145
+ df['label'] = df['label_5class'].map(CONFIG['class_mapping'])
146
+
147
+ print(f"Cleaned data: {len(df):,} samples")
148
+
149
+ # Get test split (same as training!)
150
+ _, X_test, _, y_test = train_test_split(
151
+ df['reviews'].values, df['label'].values,
152
+ test_size=CONFIG['test_size'],
153
+ random_state=CONFIG['seed'],
154
+ stratify=df['label'].values
155
+ )
156
+
157
+ print(f"Test samples: {len(X_test):,}")
158
+ print()
159
+
160
+ del df
161
+ gc.collect()
162
+
163
+ # ============================================================
164
+ # TOKENIZE TEST DATA
165
+ # ============================================================
166
+
167
+ print("TOKENIZATION")
168
+ print("-" * 70)
169
+
170
+ tokenizer = AutoTokenizer.from_pretrained(CONFIG['output_dir'], local_files_only=True)
171
+
172
+ test_ids, test_masks = tokenize_batch(X_test, tokenizer, CONFIG['max_length'], "Test")
173
+ test_labels = torch.tensor(y_test, dtype=torch.long)
174
+
175
+ test_dataset = TensorDataset(test_ids, test_masks, test_labels)
176
+ test_loader = DataLoader(
177
+ test_dataset,
178
+ batch_size=CONFIG['batch_size'],
179
+ shuffle=False,
180
+ num_workers=CONFIG['num_workers'],
181
+ pin_memory=True
182
+ )
183
+
184
+ print(f"Test batches: {len(test_loader):,}")
185
+ print()
186
+
187
+ del X_test, y_test
188
+ gc.collect()
189
+
190
+ # ============================================================
191
+ # LOAD MODEL
192
+ # ============================================================
193
+
194
+ print("LOADING MODEL")
195
+ print("-" * 70)
196
+
197
+ model = AutoModelForSequenceClassification.from_pretrained(
198
+ CONFIG['output_dir'],
199
+ local_files_only=True
200
+ )
201
+ model = model.to(device)
202
+ model.eval()
203
+
204
+ print(f" ✓ Model loaded from {CONFIG['output_dir']}/")
205
+ print(f" ✓ Num labels: {model.config.num_labels}")
206
+ print()
207
+
208
+ # ============================================================
209
+ # RUN TEST EVALUATION
210
+ # ============================================================
211
+
212
+ print("=" * 70)
213
+ print("FINAL TEST EVALUATION")
214
+ print("=" * 70)
215
+
216
+ all_preds = []
217
+ all_labels = []
218
+ all_probs = []
219
+
220
+ with torch.no_grad():
221
+ for input_ids, attention_mask, labels in tqdm(test_loader, desc="Testing", ncols=100):
222
+ input_ids = input_ids.to(device)
223
+ attention_mask = attention_mask.to(device)
224
+
225
+ if CONFIG['use_amp']:
226
+ with torch.amp.autocast('cuda'):
227
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
228
+ else:
229
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
230
+
231
+ probs = F.softmax(outputs.logits, dim=-1)
232
+ _, preds = outputs.logits.max(1)
233
+
234
+ all_preds.extend(preds.cpu().numpy())
235
+ all_labels.extend(labels.numpy())
236
+ all_probs.extend(probs.cpu().numpy())
237
+
238
+ all_preds = np.array(all_preds)
239
+ all_labels = np.array(all_labels)
240
+ all_probs = np.array(all_probs)
241
+
242
+ test_acc = 100 * (all_preds == all_labels).mean()
243
+
244
+ print()
245
+ print(f"Test Accuracy: {test_acc:.2f}%")
246
+ print()
247
+
248
+ # ============================================================
249
+ # CLASSIFICATION REPORT
250
+ # ============================================================
251
+
252
+ print("CLASSIFICATION REPORT")
253
+ print("-" * 70)
254
+
255
+ report = classification_report(
256
+ all_labels, all_preds,
257
+ target_names=CONFIG['class_names'],
258
+ digits=3,
259
+ output_dict=True
260
+ )
261
+
262
+ print(classification_report(
263
+ all_labels, all_preds,
264
+ target_names=CONFIG['class_names'],
265
+ digits=3
266
+ ))
267
+
268
+ # ============================================================
269
+ # TEACHER-FOCUSED METRICS
270
+ # ============================================================
271
+
272
+ print()
273
+ print("=" * 70)
274
+ print("📊 TEACHER-FOCUSED METRICS")
275
+ print("=" * 70)
276
+ print()
277
+
278
+ # Negative class recall
279
+ negative_recall = report['Negative']['recall'] * 100
280
+ negative_precision = report['Negative']['precision'] * 100
281
+ negative_f1 = report['Negative']['f1-score'] * 100
282
+
283
+ print(f" 🔴 NEGATIVE FEEDBACK DETECTION (Struggling Students):")
284
+ print(f" Recall: {negative_recall:.1f}% ← Catches {negative_recall:.0f}% of struggling students")
285
+ print(f" Precision: {negative_precision:.1f}% ← {negative_precision:.0f}% of flags are real issues")
286
+ print(f" F1-Score: {negative_f1:.1f}%")
287
+ print()
288
+
289
+ # False negative analysis
290
+ false_negatives = ((all_labels == 0) & (all_preds != 0)).sum()
291
+ total_negatives = (all_labels == 0).sum()
292
+ missed_pct = 100 * false_negatives / total_negatives if total_negatives > 0 else 0
293
+
294
+ print(f" ⚠️ MISSED STRUGGLING STUDENTS:")
295
+ print(f" {false_negatives:,} of {total_negatives:,} negative cases missed ({missed_pct:.1f}%)")
296
+ print()
297
+
298
+ # Where did false negatives go?
299
+ fn_mask = (all_labels == 0) & (all_preds != 0)
300
+ if fn_mask.sum() > 0:
301
+ fn_preds = all_preds[fn_mask]
302
+ fn_to_neutral = (fn_preds == 1).sum()
303
+ fn_to_positive = (fn_preds == 2).sum()
304
+ print(f" Misclassified as Neutral: {fn_to_neutral:,}")
305
+ print(f" Misclassified as Positive: {fn_to_positive:,}")
306
+ print()
307
+
308
+ # Confidence analysis
309
+ pred_confidence = all_probs.max(axis=1)
310
+ low_confidence = (pred_confidence < 0.7).sum()
311
+ low_conf_pct = 100 * low_confidence / len(pred_confidence)
312
+
313
+ print(f" 🤔 UNCERTAIN PREDICTIONS (confidence < 70%):")
314
+ print(f" {low_confidence:,} of {len(pred_confidence):,} predictions ({low_conf_pct:.1f}%)")
315
+ print(f" → These should be flagged for manual review")
316
+ print()
317
+
318
+ # Confidence by class
319
+ print(f" 📈 AVERAGE CONFIDENCE BY PREDICTION:")
320
+ for i, name in enumerate(CONFIG['class_names']):
321
+ mask = all_preds == i
322
+ if mask.sum() > 0:
323
+ avg_conf = pred_confidence[mask].mean() * 100
324
+ emoji = ['🔴', '🟡', '🟢'][i]
325
+ print(f" {emoji} {name}: {avg_conf:.1f}%")
326
+ print()
327
+
328
+ # ============================================================
329
+ # CONFUSION MATRIX PLOT
330
+ # ============================================================
331
+
332
+ print("GENERATING PLOTS")
333
+ print("-" * 70)
334
+
335
+ fig, axes = plt.subplots(1, 2, figsize=(14, 5))
336
+
337
+ cm = confusion_matrix(all_labels, all_preds)
338
+
339
+ # Counts
340
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
341
+ xticklabels=CONFIG['class_names'],
342
+ yticklabels=CONFIG['class_names'], ax=axes[0],
343
+ annot_kws={'size': 14})
344
+ axes[0].set_xlabel('Predicted', fontsize=12)
345
+ axes[0].set_ylabel('Actual', fontsize=12)
346
+ axes[0].set_title('Confusion Matrix (Counts)', fontsize=14)
347
+
348
+ # Normalized (Recall)
349
+ cm_norm = cm.astype(float) / cm.sum(axis=1, keepdims=True)
350
+ sns.heatmap(cm_norm, annot=True, fmt='.1%', cmap='Blues',
351
+ xticklabels=CONFIG['class_names'],
352
+ yticklabels=CONFIG['class_names'], ax=axes[1],
353
+ annot_kws={'size': 14})
354
+ axes[1].set_xlabel('Predicted', fontsize=12)
355
+ axes[1].set_ylabel('Actual', fontsize=12)
356
+ axes[1].set_title('Confusion Matrix (Recall per Class)', fontsize=14)
357
+
358
+ plt.tight_layout()
359
+ plt.savefig('plots/confusion_matrix_3class.png', dpi=150, bbox_inches='tight')
360
+ print(" ✓ Saved: plots/confusion_matrix_3class.png")
361
+
362
+ # ============================================================
363
+ # PER-CLASS METRICS PLOT
364
+ # ============================================================
365
+
366
+ fig, ax = plt.subplots(figsize=(12, 6))
367
+ x = np.arange(3)
368
+ width = 0.25
369
+
370
+ recalls = [report[c]['recall'] * 100 for c in CONFIG['class_names']]
371
+ precisions = [report[c]['precision'] * 100 for c in CONFIG['class_names']]
372
+ f1s = [report[c]['f1-score'] * 100 for c in CONFIG['class_names']]
373
+
374
+ bars1 = ax.bar(x - width, recalls, width, label='Recall', color='#e74c3c', edgecolor='black')
375
+ bars2 = ax.bar(x, precisions, width, label='Precision', color='#3498db', edgecolor='black')
376
+ bars3 = ax.bar(x + width, f1s, width, label='F1-Score', color='#2ecc71', edgecolor='black')
377
+
378
+ ax.set_ylabel('Score (%)', fontsize=12)
379
+ ax.set_title('Per-Class Metrics (Teacher Sentiment Model)', fontsize=14)
380
+ ax.set_xticks(x)
381
+ ax.set_xticklabels([
382
+ '🔴 Negative\n(Needs Attention)',
383
+ '🟡 Neutral\n(Mixed/Unclear)',
384
+ '🟢 Positive\n(Satisfied)'
385
+ ], fontsize=11)
386
+ ax.legend(fontsize=11)
387
+ ax.set_ylim(0, 105)
388
+ ax.axhline(y=90, color='green', linestyle='--', alpha=0.5, label='90% target')
389
+ ax.grid(True, alpha=0.3, axis='y')
390
+
391
+ # Add value labels
392
+ for bars in [bars1, bars2, bars3]:
393
+ for bar in bars:
394
+ height = bar.get_height()
395
+ ax.annotate(f'{height:.1f}%',
396
+ xy=(bar.get_x() + bar.get_width() / 2, height),
397
+ xytext=(0, 3),
398
+ textcoords="offset points",
399
+ ha='center', va='bottom', fontsize=10, fontweight='bold')
400
+
401
+ plt.tight_layout()
402
+ plt.savefig('plots/per_class_metrics_3class.png', dpi=150, bbox_inches='tight')
403
+ print(" ✓ Saved: plots/per_class_metrics_3class.png")
404
+
405
+ # ============================================================
406
+ # CONFIDENCE DISTRIBUTION PLOT
407
+ # ============================================================
408
+
409
+ fig, axes = plt.subplots(1, 2, figsize=(14, 5))
410
+
411
+ # Overall confidence distribution
412
+ axes[0].hist(pred_confidence, bins=50, color='steelblue', edgecolor='black', alpha=0.7)
413
+ axes[0].axvline(x=0.7, color='red', linestyle='--', linewidth=2, label='70% threshold')
414
+ axes[0].set_xlabel('Confidence', fontsize=12)
415
+ axes[0].set_ylabel('Count', fontsize=12)
416
+ axes[0].set_title('Prediction Confidence Distribution', fontsize=14)
417
+ axes[0].legend()
418
+ axes[0].grid(True, alpha=0.3)
419
+
420
+ # Confidence by class
421
+ colors = ['#e74c3c', '#f39c12', '#27ae60']
422
+ for i, (name, color) in enumerate(zip(CONFIG['class_names'], colors)):
423
+ mask = all_preds == i
424
+ if mask.sum() > 0:
425
+ axes[1].hist(pred_confidence[mask], bins=30, alpha=0.5, label=name, color=color)
426
+
427
+ axes[1].axvline(x=0.7, color='red', linestyle='--', linewidth=2, label='70% threshold')
428
+ axes[1].set_xlabel('Confidence', fontsize=12)
429
+ axes[1].set_ylabel('Count', fontsize=12)
430
+ axes[1].set_title('Confidence by Predicted Class', fontsize=14)
431
+ axes[1].legend()
432
+ axes[1].grid(True, alpha=0.3)
433
+
434
+ plt.tight_layout()
435
+ plt.savefig('plots/confidence_distribution.png', dpi=150, bbox_inches='tight')
436
+ print(" ✓ Saved: plots/confidence_distribution.png")
437
+
438
+ # ============================================================
439
+ # ERROR ANALYSIS PLOT
440
+ # ============================================================
441
+
442
+ fig, ax = plt.subplots(figsize=(10, 6))
443
+
444
+ # Calculate error rates
445
+ error_rates = []
446
+ for i, name in enumerate(CONFIG['class_names']):
447
+ mask = all_labels == i
448
+ errors = (all_preds[mask] != all_labels[mask]).sum()
449
+ total = mask.sum()
450
+ error_rate = 100 * errors / total if total > 0 else 0
451
+ error_rates.append(error_rate)
452
+
453
+ colors = ['#e74c3c', '#f39c12', '#27ae60']
454
+ bars = ax.bar(CONFIG['class_names'], error_rates, color=colors, edgecolor='black', linewidth=1.5)
455
+
456
+ ax.set_ylabel('Error Rate (%)', fontsize=12)
457
+ ax.set_title('Error Rate by True Class', fontsize=14)
458
+ ax.set_ylim(0, max(error_rates) * 1.2 if max(error_rates) > 0 else 10)
459
+ ax.grid(True, alpha=0.3, axis='y')
460
+
461
+ for bar, rate in zip(bars, error_rates):
462
+ ax.annotate(f'{rate:.1f}%',
463
+ xy=(bar.get_x() + bar.get_width() / 2, bar.get_height()),
464
+ xytext=(0, 3),
465
+ textcoords="offset points",
466
+ ha='center', va='bottom', fontsize=12, fontweight='bold')
467
+
468
+ plt.tight_layout()
469
+ plt.savefig('plots/error_analysis.png', dpi=150, bbox_inches='tight')
470
+ print(" ✓ Saved: plots/error_analysis.png")
471
+
472
+ # ============================================================
473
+ # SAVE RESULTS
474
+ # ============================================================
475
+
476
+ print()
477
+ print("SAVING RESULTS")
478
+ print("-" * 70)
479
+
480
+ results = {
481
+ 'test_accuracy': float(test_acc),
482
+ 'negative_recall': float(negative_recall),
483
+ 'negative_precision': float(negative_precision),
484
+ 'negative_f1': float(negative_f1),
485
+ 'neutral_recall': float(report['Neutral']['recall'] * 100),
486
+ 'positive_recall': float(report['Positive']['recall'] * 100),
487
+ 'missed_struggling_students': int(false_negatives),
488
+ 'total_negative_cases': int(total_negatives),
489
+ 'missed_percentage': float(missed_pct),
490
+ 'low_confidence_predictions': int(low_confidence),
491
+ 'low_confidence_percentage': float(low_conf_pct),
492
+ 'macro_f1': float(report['macro avg']['f1-score'] * 100),
493
+ 'weighted_f1': float(report['weighted avg']['f1-score'] * 100),
494
+ }
495
+
496
+ # Save as JSON
497
+ with open(os.path.join(CONFIG['output_dir'], 'results.json'), 'w') as f:
498
+ json.dump(results, f, indent=2)
499
+ print(f" ✓ Saved: {CONFIG['output_dir']}/results.json")
500
+
501
+ # Save full results as PyTorch
502
+ full_results = {
503
+ **results,
504
+ 'config': CONFIG,
505
+ 'classification_report': report,
506
+ 'confusion_matrix': cm.tolist(),
507
+ 'all_predictions': all_preds.tolist(),
508
+ 'all_labels': all_labels.tolist(),
509
+ }
510
+ torch.save(full_results, os.path.join(CONFIG['output_dir'], 'results.pt'))
511
+ print(f" ✓ Saved: {CONFIG['output_dir']}/results.pt")
512
+
513
+ # ============================================================
514
+ # FINAL SUMMARY
515
+ # ============================================================
516
+
517
+ print()
518
+ print("=" * 70)
519
+ print("🎉 EVALUATION COMPLETE!")
520
+ print("=" * 70)
521
+ print()
522
+ print(" RESULTS SUMMARY:")
523
+ print(f" Test Accuracy: {test_acc:.2f}%")
524
+ print(f" Macro F1-Score: {report['macro avg']['f1-score']*100:.2f}%")
525
+ print(f" Weighted F1-Score: {report['weighted avg']['f1-score']*100:.2f}%")
526
+ print()
527
+ print(" PER-CLASS RECALL (most important for teachers):")
528
+ print(f" 🔴 Negative: {negative_recall:.1f}% ← Catches {100-missed_pct:.0f}% of struggling students")
529
+ print(f" 🟡 Neutral: {report['Neutral']['recall']*100:.1f}%")
530
+ print(f" 🟢 Positive: {report['Positive']['recall']*100:.1f}%")
531
+ print()
532
+ print(" KEY INSIGHTS:")
533
+ print(f" • {false_negatives:,} struggling students would be missed ({missed_pct:.1f}%)")
534
+ print(f" • {low_confidence:,} predictions need manual review ({low_conf_pct:.1f}%)")
535
+ print()
536
+ print(" FILES SAVED:")
537
+ print(f" • {CONFIG['output_dir']}/results.json")
538
+ print(f" • {CONFIG['output_dir']}/results.pt")
539
+ print(f" • plots/confusion_matrix_3class.png")
540
+ print(f" • plots/per_class_metrics_3class.png")
541
+ print(f" • plots/confidence_distribution.png")
542
+ print(f" • plots/error_analysis.png")
543
+ print()
544
+ print("=" * 70)
545
+
546
+
547
+ if __name__ == '__main__':
548
+ main()
course_feedback_nlp/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ tqdm
2
+ pandas
3
+ numpy
4
+ scikit-learn
5
+ seaborn
6
+ matplotlib
7
+ transformers
course_feedback_nlp/test.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # save as predict.py
2
+
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+
6
+ def predict(text):
7
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
8
+
9
+ model = AutoModelForSequenceClassification.from_pretrained(
10
+ 'sentiment_model', local_files_only=True
11
+ ).to(device)
12
+ model.eval()
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(
15
+ 'sentiment_model', local_files_only=True
16
+ )
17
+
18
+ inputs = tokenizer(
19
+ text,
20
+ return_tensors='pt',
21
+ truncation=True,
22
+ max_length=96,
23
+ padding='max_length'
24
+ ).to(device)
25
+
26
+ with torch.no_grad():
27
+ with torch.amp.autocast('cuda'):
28
+ outputs = model(**inputs)
29
+
30
+ probs = torch.softmax(outputs.logits, dim=1)
31
+ pred_class = outputs.logits.argmax(dim=1).item() + 1 # 1-5
32
+ confidence = probs[0][pred_class - 1].item()
33
+
34
+ return {
35
+ 'rating': pred_class,
36
+ 'confidence': f'{confidence:.1%}',
37
+ 'all_probs': {i+1: f'{p:.1%}' for i, p in enumerate(probs[0])}
38
+ }
39
+
40
+ if __name__ == '__main__':
41
+ tests = [
42
+ "This course was amazing! Best I've ever taken!",
43
+ "Terrible waste of time. Very boring.",
44
+ "It was okay, nothing special.",
45
+ "Good course but could be better organized.",
46
+ "Absolutely fantastic! Highly recommend!"
47
+ ]
48
+
49
+ for text in tests:
50
+ result = predict(text)
51
+ print(f"\n'{text[:50]}...'")
52
+ print(f" → Predicted: {result['rating']} stars ({result['confidence']})")
course_feedback_nlp/train.py ADDED
@@ -0,0 +1,862 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Course Review Sentiment Model - Training Script
3
+ VRAM Optimized for AMD 7900 XTX (24GB)
4
+
5
+ PATCHES APPLIED:
6
+ - Class weights to handle imbalanced data (78.8% are 5-star reviews)
7
+ - Optimized batch_size=128 for better accuracy
8
+ - max_length=96 for faster training
9
+ - AMD crash protection and emergency checkpointing
10
+ - Periodic checkpoint saving (every epoch)
11
+ """
12
+
13
+ import torch
14
+ import torch.nn as nn
15
+ from torch.utils.data import Dataset, DataLoader, TensorDataset
16
+ import pandas as pd
17
+ import numpy as np
18
+ from sklearn.model_selection import train_test_split
19
+ from sklearn.metrics import classification_report, confusion_matrix
20
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
21
+ from transformers import get_linear_schedule_with_warmup
22
+ from tqdm.auto import tqdm
23
+ import matplotlib.pyplot as plt
24
+ import seaborn as sns
25
+ import os
26
+ import time
27
+ import gc
28
+ import warnings
29
+ warnings.filterwarnings('ignore')
30
+
31
+ # ============================================================
32
+ # AMD CRASH PROTECTION - Suppress problematic logging
33
+ # ============================================================
34
+ os.environ['AMD_LOG_LEVEL'] = '0'
35
+ os.environ['ROCM_LOG_LEVEL'] = '0'
36
+ os.environ['HIP_VISIBLE_DEVICES'] = '0'
37
+
38
+ # ============================================================
39
+ # CONFIGURATION
40
+ # ============================================================
41
+
42
+ CONFIG = {
43
+ 'data_path': 'Coursera_reviews.csv',
44
+ 'model_name': './distilbert-base-uncased',
45
+ 'output_dir': 'sentiment_model',
46
+ 'checkpoint_dir': 'checkpoints', # NEW: For periodic saves
47
+ 'max_length': 96, # CHANGED: 128 → 96 (faster, minimal accuracy loss)
48
+ 'batch_size': 128, # CHANGED: 512 → 128 (better accuracy per Run 3)
49
+ 'epochs': 5,
50
+ 'learning_rate': 2e-5,
51
+ 'weight_decay': 0.01,
52
+ 'warmup_ratio': 0.1,
53
+ 'train_size': 0.8,
54
+ 'val_size': 0.1,
55
+ 'test_size': 0.1,
56
+ 'seed': 42,
57
+ 'num_workers': 4,
58
+ 'pin_memory': True,
59
+ 'use_amp': True, # Mixed precision for speed
60
+ 'use_class_weights': True, # NEW: Address class imbalance
61
+ 'checkpoint_every_epoch': True, # NEW: Save checkpoint every epoch
62
+ }
63
+
64
+ # ============================================================
65
+ # MAIN FUNCTION
66
+ # ============================================================
67
+
68
+ def main():
69
+ # ============================================================
70
+ # SETUP
71
+ # ============================================================
72
+
73
+ def set_seed(seed):
74
+ torch.manual_seed(seed)
75
+ torch.cuda.manual_seed_all(seed)
76
+ np.random.seed(seed)
77
+
78
+ set_seed(CONFIG['seed'])
79
+
80
+ # Device
81
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
82
+ print("=" * 70)
83
+ print("DEVICE INFORMATION")
84
+ print("=" * 70)
85
+ print(f" Device: {device}")
86
+ if torch.cuda.is_available():
87
+ print(f" GPU: {torch.cuda.get_device_name(0)}")
88
+ total_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
89
+ print(f" Memory: {total_mem:.2f} GB")
90
+ print("=" * 70)
91
+ print()
92
+
93
+ # Create directories early
94
+ os.makedirs(CONFIG['output_dir'], exist_ok=True)
95
+ os.makedirs(CONFIG['checkpoint_dir'], exist_ok=True)
96
+ os.makedirs('plots', exist_ok=True)
97
+
98
+ # ============================================================
99
+ # VERIFY LOCAL MODEL EXISTS
100
+ # ============================================================
101
+
102
+ print("=" * 70)
103
+ print("VERIFYING LOCAL MODEL")
104
+ print("=" * 70)
105
+
106
+ model_path = CONFIG['model_name']
107
+ if os.path.exists(model_path):
108
+ print(f" ✓ Model directory found: {model_path}")
109
+ else:
110
+ print(f" ✗ Model directory NOT found: {model_path}")
111
+ return
112
+
113
+ print("=" * 70)
114
+ print()
115
+
116
+ # ============================================================
117
+ # DATA LOADING
118
+ # ============================================================
119
+
120
+ print("=" * 70)
121
+ print("DATA LOADING")
122
+ print("=" * 70)
123
+
124
+ print("Loading data...")
125
+ df = pd.read_csv(CONFIG['data_path'])
126
+ print(f" Raw data shape: {df.shape}")
127
+
128
+ # Clean data
129
+ df = df.dropna(subset=['reviews', 'rating'])
130
+ df = df[df['reviews'].str.strip() != '']
131
+ df['rating'] = df['rating'].astype(int)
132
+ df = df[df['rating'].between(1, 5)]
133
+ df['label'] = df['rating'] - 1
134
+
135
+ print(f" Cleaned data shape: {df.shape}")
136
+ print(f"\n Rating distribution:")
137
+ for rating, count in df['rating'].value_counts().sort_index().items():
138
+ pct = 100 * count / len(df)
139
+ bar = "█" * int(pct / 2)
140
+ print(f" {rating} Star: {count:>8,} ({pct:>5.1f}%) {bar}")
141
+
142
+ # ============================================================
143
+ # CALCULATE CLASS WEIGHTS (Before deleting df!)
144
+ # ============================================================
145
+
146
+ if CONFIG['use_class_weights']:
147
+ print(f"\n Calculating class weights...")
148
+ class_counts = df['label'].value_counts().sort_index().values
149
+ # Inverse frequency weighting
150
+ class_weights = 1.0 / class_counts
151
+ # Normalize so weights sum to num_classes
152
+ class_weights = class_weights / class_weights.sum() * len(class_counts)
153
+ class_weights = torch.tensor(class_weights, dtype=torch.float32)
154
+
155
+ print(f" Class weights (to balance {class_counts[-1]/class_counts[0]:.1f}x imbalance):")
156
+ for i, (w, c) in enumerate(zip(class_weights, class_counts)):
157
+ print(f" {i+1} Star: weight={w:.4f} (count={c:,})")
158
+ else:
159
+ class_weights = None
160
+
161
+ print("=" * 70)
162
+ print()
163
+
164
+ # ============================================================
165
+ # TRAIN / VALIDATION / TEST SPLIT
166
+ # ============================================================
167
+
168
+ print("=" * 70)
169
+ print("DATA SPLITTING")
170
+ print("=" * 70)
171
+
172
+ X_temp, X_test, y_temp, y_test = train_test_split(
173
+ df['reviews'].values,
174
+ df['label'].values,
175
+ test_size=CONFIG['test_size'],
176
+ random_state=CONFIG['seed'],
177
+ stratify=df['label'].values
178
+ )
179
+
180
+ relative_val_size = CONFIG['val_size'] / (CONFIG['train_size'] + CONFIG['val_size'])
181
+
182
+ X_train, X_val, y_train, y_val = train_test_split(
183
+ X_temp,
184
+ y_temp,
185
+ test_size=relative_val_size,
186
+ random_state=CONFIG['seed'],
187
+ stratify=y_temp
188
+ )
189
+
190
+ print(f" Training samples: {len(X_train):>10,} ({100*len(X_train)/len(df):.1f}%)")
191
+ print(f" Validation samples: {len(X_val):>10,} ({100*len(X_val)/len(df):.1f}%)")
192
+ print(f" Test samples: {len(X_test):>10,} ({100*len(X_test)/len(df):.1f}%)")
193
+ print("=" * 70)
194
+ print()
195
+
196
+ # Now we can delete df
197
+ del df
198
+ gc.collect()
199
+
200
+ # ============================================================
201
+ # TOKENIZER
202
+ # ============================================================
203
+
204
+ print("Loading tokenizer...")
205
+ tokenizer = AutoTokenizer.from_pretrained(
206
+ CONFIG['model_name'],
207
+ local_files_only=True
208
+ )
209
+ print(f" ✓ Tokenizer loaded")
210
+ print()
211
+
212
+ # ============================================================
213
+ # PRE-TOKENIZE ALL DATA (Key optimization!)
214
+ # ============================================================
215
+
216
+ print("=" * 70)
217
+ print("PRE-TOKENIZING ALL DATA")
218
+ print("=" * 70)
219
+ print(" This runs once and stores tensors for fast loading...")
220
+ print()
221
+
222
+ def tokenize_batch(texts, desc="Tokenizing"):
223
+ """Tokenize all texts at once using batch processing"""
224
+ all_input_ids = []
225
+ all_attention_masks = []
226
+
227
+ batch_size = 10000 # Process 10k at a time to avoid memory issues
228
+
229
+ for i in tqdm(range(0, len(texts), batch_size), desc=desc):
230
+ batch_texts = texts[i:i+batch_size].tolist()
231
+
232
+ encodings = tokenizer(
233
+ batch_texts,
234
+ truncation=True,
235
+ padding='max_length',
236
+ max_length=CONFIG['max_length'],
237
+ return_tensors='pt'
238
+ )
239
+
240
+ all_input_ids.append(encodings['input_ids'])
241
+ all_attention_masks.append(encodings['attention_mask'])
242
+
243
+ return (
244
+ torch.cat(all_input_ids, dim=0),
245
+ torch.cat(all_attention_masks, dim=0)
246
+ )
247
+
248
+ # Tokenize train
249
+ print(" Tokenizing training data...")
250
+ train_input_ids, train_attention_masks = tokenize_batch(X_train, " Train")
251
+ train_labels = torch.tensor(y_train, dtype=torch.long)
252
+
253
+ # Tokenize validation
254
+ print(" Tokenizing validation data...")
255
+ val_input_ids, val_attention_masks = tokenize_batch(X_val, " Val")
256
+ val_labels = torch.tensor(y_val, dtype=torch.long)
257
+
258
+ # Tokenize test
259
+ print(" Tokenizing test data...")
260
+ test_input_ids, test_attention_masks = tokenize_batch(X_test, " Test")
261
+ test_labels = torch.tensor(y_test, dtype=torch.long)
262
+
263
+ # Free memory
264
+ del X_train, X_val, X_test, y_train, y_val, y_test, X_temp, y_temp
265
+ gc.collect()
266
+
267
+ print()
268
+ print(f" ✓ Train tensors: {train_input_ids.shape}")
269
+ print(f" ✓ Val tensors: {val_input_ids.shape}")
270
+ print(f" ✓ Test tensors: {test_input_ids.shape}")
271
+ print("=" * 70)
272
+ print()
273
+
274
+ # ============================================================
275
+ # CREATE TENSOR DATASETS (Fast!)
276
+ # ============================================================
277
+
278
+ train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels)
279
+ val_dataset = TensorDataset(val_input_ids, val_attention_masks, val_labels)
280
+ test_dataset = TensorDataset(test_input_ids, test_attention_masks, test_labels)
281
+
282
+ # ============================================================
283
+ # DATALOADERS
284
+ # ============================================================
285
+
286
+ print("Creating dataloaders...")
287
+
288
+ train_loader = DataLoader(
289
+ train_dataset,
290
+ batch_size=CONFIG['batch_size'],
291
+ shuffle=True,
292
+ num_workers=CONFIG['num_workers'],
293
+ pin_memory=CONFIG['pin_memory'],
294
+ persistent_workers=True # NEW: Keep workers alive between epochs
295
+ )
296
+
297
+ val_loader = DataLoader(
298
+ val_dataset,
299
+ batch_size=CONFIG['batch_size'],
300
+ shuffle=False,
301
+ num_workers=CONFIG['num_workers'],
302
+ pin_memory=CONFIG['pin_memory'],
303
+ persistent_workers=True
304
+ )
305
+
306
+ test_loader = DataLoader(
307
+ test_dataset,
308
+ batch_size=CONFIG['batch_size'],
309
+ shuffle=False,
310
+ num_workers=CONFIG['num_workers'],
311
+ pin_memory=CONFIG['pin_memory'],
312
+ persistent_workers=True
313
+ )
314
+
315
+ print(f" ✓ Train batches: {len(train_loader):,}")
316
+ print(f" ✓ Validation batches: {len(val_loader):,}")
317
+ print(f" ✓ Test batches: {len(test_loader):,}")
318
+ print()
319
+
320
+ # ============================================================
321
+ # MODEL
322
+ # ============================================================
323
+
324
+ print("Loading model...")
325
+ model = AutoModelForSequenceClassification.from_pretrained(
326
+ CONFIG['model_name'],
327
+ num_labels=5,
328
+ local_files_only=True
329
+ )
330
+ model = model.to(device)
331
+
332
+ total_params = sum(p.numel() for p in model.parameters())
333
+ print(f" ✓ Model loaded")
334
+ print(f" ✓ Total parameters: {total_params:,}")
335
+ print()
336
+
337
+ # ============================================================
338
+ # LOSS FUNCTION WITH CLASS WEIGHTS
339
+ # ============================================================
340
+
341
+ if CONFIG['use_class_weights'] and class_weights is not None:
342
+ class_weights = class_weights.to(device)
343
+ criterion = nn.CrossEntropyLoss(weight=class_weights)
344
+ print(f" ✓ Using weighted CrossEntropyLoss")
345
+ else:
346
+ criterion = nn.CrossEntropyLoss()
347
+ print(f" ✓ Using standard CrossEntropyLoss")
348
+ print()
349
+
350
+ # ============================================================
351
+ # OPTIMIZER & SCHEDULER
352
+ # ============================================================
353
+
354
+ optimizer = torch.optim.AdamW(
355
+ model.parameters(),
356
+ lr=CONFIG['learning_rate'],
357
+ weight_decay=CONFIG['weight_decay']
358
+ )
359
+
360
+ total_steps = len(train_loader) * CONFIG['epochs']
361
+ warmup_steps = int(total_steps * CONFIG['warmup_ratio'])
362
+
363
+ scheduler = get_linear_schedule_with_warmup(
364
+ optimizer,
365
+ num_warmup_steps=warmup_steps,
366
+ num_training_steps=total_steps
367
+ )
368
+
369
+ # Mixed Precision Scaler (for speed)
370
+ scaler = torch.amp.GradScaler('cuda') if CONFIG['use_amp'] else None
371
+
372
+ print("Optimizer & Scheduler configured:")
373
+ print(f" ✓ Optimizer: AdamW (lr={CONFIG['learning_rate']})")
374
+ print(f" ✓ Total steps: {total_steps:,}")
375
+ print(f" ✓ Warmup steps: {warmup_steps:,}")
376
+ print(f" ✓ Mixed Precision: {CONFIG['use_amp']}")
377
+ print()
378
+
379
+ # ============================================================
380
+ # HELPER FUNCTION: Save checkpoint
381
+ # ============================================================
382
+
383
+ def save_checkpoint(model, tokenizer, optimizer, scheduler, scaler, epoch,
384
+ val_acc, history, path, is_best=False):
385
+ """Save a training checkpoint"""
386
+ checkpoint = {
387
+ 'epoch': epoch,
388
+ 'model_state_dict': model.state_dict(),
389
+ 'optimizer_state_dict': optimizer.state_dict(),
390
+ 'scheduler_state_dict': scheduler.state_dict(),
391
+ 'scaler_state_dict': scaler.state_dict() if scaler else None,
392
+ 'val_accuracy': val_acc,
393
+ 'history': history,
394
+ 'config': CONFIG,
395
+ }
396
+ torch.save(checkpoint, path)
397
+
398
+ if is_best:
399
+ model.save_pretrained(CONFIG['output_dir'])
400
+ tokenizer.save_pretrained(CONFIG['output_dir'])
401
+
402
+ # ============================================================
403
+ # TRAINING LOOP (with crash protection)
404
+ # ============================================================
405
+
406
+ print("=" * 70)
407
+ print("TRAINING STARTED")
408
+ print("=" * 70)
409
+ print(f" Epochs: {CONFIG['epochs']}")
410
+ print(f" Batch size: {CONFIG['batch_size']}")
411
+ print(f" Max length: {CONFIG['max_length']}")
412
+ print(f" Device: {device}")
413
+ print(f" AMP: {CONFIG['use_amp']}")
414
+ print(f" Class weights: {CONFIG['use_class_weights']}")
415
+ print("=" * 70)
416
+ print()
417
+
418
+ best_val_acc = 0
419
+ history = {
420
+ 'train_loss': [],
421
+ 'train_acc': [],
422
+ 'val_loss': [],
423
+ 'val_acc': []
424
+ }
425
+
426
+ total_train_time = 0
427
+
428
+ # ============================================================
429
+ # WRAP IN TRY/EXCEPT FOR CRASH PROTECTION
430
+ # ============================================================
431
+
432
+ try:
433
+ for epoch in range(CONFIG['epochs']):
434
+ epoch_start_time = time.time()
435
+
436
+ # ==================== TRAINING ====================
437
+ model.train()
438
+ train_loss = 0
439
+ train_correct = 0
440
+ train_total = 0
441
+
442
+ train_pbar = tqdm(
443
+ train_loader,
444
+ desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [TRAIN]",
445
+ unit="batch",
446
+ ncols=120
447
+ )
448
+
449
+ for batch_idx, (input_ids, attention_mask, labels) in enumerate(train_pbar):
450
+ # Move to GPU with non_blocking for speed
451
+ input_ids = input_ids.to(device, non_blocking=True)
452
+ attention_mask = attention_mask.to(device, non_blocking=True)
453
+ labels = labels.to(device, non_blocking=True)
454
+
455
+ optimizer.zero_grad()
456
+
457
+ # Mixed precision forward pass
458
+ if CONFIG['use_amp']:
459
+ with torch.amp.autocast('cuda'):
460
+ outputs = model(
461
+ input_ids=input_ids,
462
+ attention_mask=attention_mask
463
+ )
464
+ # USE CUSTOM LOSS WITH CLASS WEIGHTS
465
+ logits = outputs.logits
466
+ loss = criterion(logits, labels)
467
+
468
+ scaler.scale(loss).backward()
469
+ scaler.unscale_(optimizer)
470
+ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
471
+ scaler.step(optimizer)
472
+ scaler.update()
473
+ else:
474
+ outputs = model(
475
+ input_ids=input_ids,
476
+ attention_mask=attention_mask
477
+ )
478
+ logits = outputs.logits
479
+ loss = criterion(logits, labels)
480
+ loss.backward()
481
+ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
482
+ optimizer.step()
483
+
484
+ scheduler.step()
485
+
486
+ train_loss += loss.item()
487
+ _, predicted = logits.max(1)
488
+ train_total += labels.size(0)
489
+ train_correct += predicted.eq(labels).sum().item()
490
+
491
+ running_loss = train_loss / (batch_idx + 1)
492
+ running_acc = 100 * train_correct / train_total
493
+ current_lr = scheduler.get_last_lr()[0]
494
+
495
+ # Show GPU memory usage
496
+ if torch.cuda.is_available():
497
+ mem_used = torch.cuda.memory_allocated() / 1e9
498
+ mem_total = torch.cuda.get_device_properties(0).total_memory / 1e9
499
+
500
+ train_pbar.set_postfix({
501
+ 'loss': f'{running_loss:.4f}',
502
+ 'acc': f'{running_acc:.2f}%',
503
+ 'lr': f'{current_lr:.2e}',
504
+ 'VRAM': f'{mem_used:.1f}/{mem_total:.1f}GB'
505
+ })
506
+
507
+ train_loss = train_loss / len(train_loader)
508
+ train_acc = 100 * train_correct / train_total
509
+
510
+ # ==================== VALIDATION ====================
511
+ model.eval()
512
+ val_loss = 0
513
+ val_correct = 0
514
+ val_total = 0
515
+
516
+ val_pbar = tqdm(
517
+ val_loader,
518
+ desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [VAL] ",
519
+ unit="batch",
520
+ ncols=120
521
+ )
522
+
523
+ with torch.no_grad():
524
+ for batch_idx, (input_ids, attention_mask, labels) in enumerate(val_pbar):
525
+ input_ids = input_ids.to(device, non_blocking=True)
526
+ attention_mask = attention_mask.to(device, non_blocking=True)
527
+ labels = labels.to(device, non_blocking=True)
528
+
529
+ if CONFIG['use_amp']:
530
+ with torch.amp.autocast('cuda'):
531
+ outputs = model(
532
+ input_ids=input_ids,
533
+ attention_mask=attention_mask
534
+ )
535
+ logits = outputs.logits
536
+ loss = criterion(logits, labels)
537
+ else:
538
+ outputs = model(
539
+ input_ids=input_ids,
540
+ attention_mask=attention_mask
541
+ )
542
+ logits = outputs.logits
543
+ loss = criterion(logits, labels)
544
+
545
+ val_loss += loss.item()
546
+ _, predicted = logits.max(1)
547
+ val_total += labels.size(0)
548
+ val_correct += predicted.eq(labels).sum().item()
549
+
550
+ running_loss = val_loss / (batch_idx + 1)
551
+ running_acc = 100 * val_correct / val_total
552
+
553
+ val_pbar.set_postfix({
554
+ 'loss': f'{running_loss:.4f}',
555
+ 'acc': f'{running_acc:.2f}%'
556
+ })
557
+
558
+ val_loss = val_loss / len(val_loader)
559
+ val_acc = 100 * val_correct / val_total
560
+
561
+ history['train_loss'].append(train_loss)
562
+ history['train_acc'].append(train_acc)
563
+ history['val_loss'].append(val_loss)
564
+ history['val_acc'].append(val_acc)
565
+
566
+ epoch_time = time.time() - epoch_start_time
567
+ total_train_time += epoch_time
568
+
569
+ # ==================== EPOCH SUMMARY ====================
570
+ print()
571
+ print("─" * 70)
572
+ print(f"EPOCH {epoch+1}/{CONFIG['epochs']} SUMMARY")
573
+ print("─" * 70)
574
+ print(f" {'Metric':<20} {'Train':>15} {'Validation':>15}")
575
+ print(f" {'-'*20} {'-'*15} {'-'*15}")
576
+ print(f" {'Loss':<20} {train_loss:>15.4f} {val_loss:>15.4f}")
577
+ print(f" {'Accuracy':<20} {train_acc:>14.2f}% {val_acc:>14.2f}%")
578
+ print(f" {'-'*20} {'-'*15} {'-'*15}")
579
+ print(f" {'Time':<20} {epoch_time:>14.1f}s")
580
+ print(f" {'Samples/sec':<20} {len(train_dataset)/epoch_time:>14.1f}")
581
+
582
+ # ==================== SAVE CHECKPOINT ====================
583
+ is_best = val_acc > best_val_acc
584
+
585
+ if is_best:
586
+ best_val_acc = val_acc
587
+
588
+ # Always save periodic checkpoint
589
+ if CONFIG['checkpoint_every_epoch']:
590
+ checkpoint_path = os.path.join(
591
+ CONFIG['checkpoint_dir'],
592
+ f'checkpoint_epoch_{epoch+1}.pt'
593
+ )
594
+ save_checkpoint(
595
+ model, tokenizer, optimizer, scheduler, scaler,
596
+ epoch + 1, val_acc, history, checkpoint_path, is_best=is_best
597
+ )
598
+ print(f"\n 💾 Checkpoint saved: {checkpoint_path}")
599
+
600
+ if is_best:
601
+ # Also save as best model
602
+ torch.save({
603
+ 'epoch': epoch + 1,
604
+ 'best_val_accuracy': best_val_acc,
605
+ 'config': CONFIG,
606
+ 'history': history
607
+ }, os.path.join(CONFIG['output_dir'], 'training_info.pt'))
608
+
609
+ print(f" 🏆 NEW BEST MODEL SAVED! Val Accuracy: {best_val_acc:.2f}%")
610
+ else:
611
+ print(f"\n ℹ️ Best Val Accuracy so far: {best_val_acc:.2f}%")
612
+
613
+ print("─" * 70)
614
+ print()
615
+
616
+ except Exception as e:
617
+ # ============================================================
618
+ # EMERGENCY SAVE ON CRASH
619
+ # ============================================================
620
+ print()
621
+ print("!" * 70)
622
+ print("⚠️ ERROR OCCURRED - SAVING EMERGENCY CHECKPOINT")
623
+ print("!" * 70)
624
+ print(f" Error: {e}")
625
+
626
+ emergency_dir = CONFIG['output_dir'] + '_emergency'
627
+ os.makedirs(emergency_dir, exist_ok=True)
628
+
629
+ try:
630
+ model.save_pretrained(emergency_dir)
631
+ tokenizer.save_pretrained(emergency_dir)
632
+
633
+ torch.save({
634
+ 'epoch': epoch + 1 if 'epoch' in dir() else 0,
635
+ 'history': history,
636
+ 'config': CONFIG,
637
+ 'error': str(e)
638
+ }, os.path.join(emergency_dir, 'emergency_checkpoint.pt'))
639
+
640
+ print(f" ✓ Emergency checkpoint saved to: {emergency_dir}")
641
+ except Exception as save_error:
642
+ print(f" ✗ Failed to save emergency checkpoint: {save_error}")
643
+
644
+ print("!" * 70)
645
+ raise # Re-raise the exception
646
+
647
+ print("=" * 70)
648
+ print("TRAINING COMPLETE")
649
+ print("=" * 70)
650
+ print(f" Total training time: {total_train_time/60:.1f} minutes")
651
+ print(f" Best Val Accuracy: {best_val_acc:.2f}%")
652
+ print("=" * 70)
653
+ print()
654
+
655
+ # ============================================================
656
+ # FINAL TEST EVALUATION
657
+ # ============================================================
658
+
659
+ print("=" * 70)
660
+ print("FINAL TEST EVALUATION")
661
+ print("=" * 70)
662
+ print("Loading best model...")
663
+
664
+ model = AutoModelForSequenceClassification.from_pretrained(
665
+ CONFIG['output_dir'],
666
+ local_files_only=True
667
+ )
668
+ model = model.to(device)
669
+ model.eval()
670
+
671
+ # Use standard loss for test evaluation (no class weights)
672
+ test_criterion = nn.CrossEntropyLoss()
673
+
674
+ test_loss = 0
675
+ test_correct = 0
676
+ test_total = 0
677
+ all_preds = []
678
+ all_labels = []
679
+
680
+ test_pbar = tqdm(test_loader, desc="Testing", unit="batch", ncols=120)
681
+
682
+ with torch.no_grad():
683
+ for batch_idx, (input_ids, attention_mask, labels) in enumerate(test_pbar):
684
+ input_ids = input_ids.to(device, non_blocking=True)
685
+ attention_mask = attention_mask.to(device, non_blocking=True)
686
+ labels = labels.to(device, non_blocking=True)
687
+
688
+ if CONFIG['use_amp']:
689
+ with torch.amp.autocast('cuda'):
690
+ outputs = model(
691
+ input_ids=input_ids,
692
+ attention_mask=attention_mask
693
+ )
694
+ logits = outputs.logits
695
+ loss = test_criterion(logits, labels)
696
+ else:
697
+ outputs = model(
698
+ input_ids=input_ids,
699
+ attention_mask=attention_mask
700
+ )
701
+ logits = outputs.logits
702
+ loss = test_criterion(logits, labels)
703
+
704
+ test_loss += loss.item()
705
+ _, predicted = logits.max(1)
706
+ test_total += labels.size(0)
707
+ test_correct += predicted.eq(labels).sum().item()
708
+
709
+ all_preds.extend(predicted.cpu().numpy())
710
+ all_labels.extend(labels.cpu().numpy())
711
+
712
+ test_pbar.set_postfix({
713
+ 'loss': f'{test_loss/(batch_idx+1):.4f}',
714
+ 'acc': f'{100*test_correct/test_total:.2f}%'
715
+ })
716
+
717
+ test_loss = test_loss / len(test_loader)
718
+ test_acc = 100 * test_correct / test_total
719
+ all_preds = np.array(all_preds)
720
+ all_labels = np.array(all_labels)
721
+
722
+ within_one = np.mean(np.abs(all_preds - all_labels) <= 1) * 100
723
+
724
+ print()
725
+ print("─" * 70)
726
+ print("TEST RESULTS")
727
+ print("─" * 70)
728
+ print(f" Test Loss: {test_loss:.4f}")
729
+ print(f" Test Accuracy: {test_acc:.2f}%")
730
+ print(f" Within ±1 Star: {within_one:.2f}%")
731
+ print("─" * 70)
732
+ print()
733
+
734
+ print("CLASSIFICATION REPORT")
735
+ print("─" * 70)
736
+ report = classification_report(
737
+ all_labels,
738
+ all_preds,
739
+ target_names=['1 Star', '2 Star', '3 Star', '4 Star', '5 Star'],
740
+ digits=3,
741
+ output_dict=True
742
+ )
743
+ print(classification_report(
744
+ all_labels,
745
+ all_preds,
746
+ target_names=['1 Star', '2 Star', '3 Star', '4 Star', '5 Star'],
747
+ digits=3
748
+ ))
749
+
750
+ # ============================================================
751
+ # PLOTS
752
+ # ============================================================
753
+
754
+ fig, axes = plt.subplots(1, 2, figsize=(14, 5))
755
+ epochs_range = range(1, len(history['train_loss']) + 1)
756
+
757
+ axes[0].plot(epochs_range, history['train_loss'], 'b-o', label='Train', linewidth=2)
758
+ axes[0].plot(epochs_range, history['val_loss'], 'r-o', label='Val', linewidth=2)
759
+ axes[0].set_xlabel('Epoch')
760
+ axes[0].set_ylabel('Loss')
761
+ axes[0].set_title('Loss (with Class Weights)' if CONFIG['use_class_weights'] else 'Loss')
762
+ axes[0].legend()
763
+ axes[0].grid(True, alpha=0.3)
764
+
765
+ axes[1].plot(epochs_range, history['train_acc'], 'b-o', label='Train', linewidth=2)
766
+ axes[1].plot(epochs_range, history['val_acc'], 'r-o', label='Val', linewidth=2)
767
+ axes[1].set_xlabel('Epoch')
768
+ axes[1].set_ylabel('Accuracy (%)')
769
+ axes[1].set_title('Accuracy')
770
+ axes[1].legend()
771
+ axes[1].grid(True, alpha=0.3)
772
+
773
+ plt.tight_layout()
774
+ plt.savefig('plots/training_history.png', dpi=150)
775
+ print("✓ Saved: plots/training_history.png")
776
+
777
+ fig, axes = plt.subplots(1, 2, figsize=(16, 6))
778
+ cm = confusion_matrix(all_labels, all_preds)
779
+ labels_names = ['1 Star', '2 Star', '3 Star', '4 Star', '5 Star']
780
+
781
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
782
+ xticklabels=labels_names, yticklabels=labels_names, ax=axes[0])
783
+ axes[0].set_xlabel('Predicted')
784
+ axes[0].set_ylabel('Actual')
785
+ axes[0].set_title('Confusion Matrix (Counts)')
786
+
787
+ cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
788
+ sns.heatmap(cm_norm, annot=True, fmt='.1%', cmap='Blues',
789
+ xticklabels=labels_names, yticklabels=labels_names, ax=axes[1])
790
+ axes[1].set_xlabel('Predicted')
791
+ axes[1].set_ylabel('Actual')
792
+ axes[1].set_title('Confusion Matrix (Normalized)')
793
+
794
+ plt.tight_layout()
795
+ plt.savefig('plots/confusion_matrix.png', dpi=150)
796
+ print("✓ Saved: plots/confusion_matrix.png")
797
+
798
+ # ============================================================
799
+ # PER-CLASS RECALL COMPARISON PLOT (NEW!)
800
+ # ============================================================
801
+
802
+ fig, ax = plt.subplots(figsize=(10, 6))
803
+ classes = ['1 Star', '2 Star', '3 Star', '4 Star', '5 Star']
804
+ recalls = [report[c]['recall'] * 100 for c in classes]
805
+
806
+ bars = ax.bar(classes, recalls, color=['#ff6b6b', '#ffa94d', '#ffd43b', '#69db7c', '#4dabf7'])
807
+ ax.axhline(y=50, color='red', linestyle='--', alpha=0.5, label='50% threshold')
808
+ ax.axhline(y=75, color='green', linestyle='--', alpha=0.5, label='75% threshold')
809
+
810
+ for bar, recall in zip(bars, recalls):
811
+ ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
812
+ f'{recall:.1f}%', ha='center', va='bottom', fontsize=11)
813
+
814
+ ax.set_ylabel('Recall (%)')
815
+ ax.set_title('Per-Class Recall (Higher = Better at detecting this class)')
816
+ ax.set_ylim(0, 105)
817
+ ax.legend()
818
+ ax.grid(True, alpha=0.3, axis='y')
819
+
820
+ plt.tight_layout()
821
+ plt.savefig('plots/per_class_recall.png', dpi=150)
822
+ print("✓ Saved: plots/per_class_recall.png")
823
+
824
+ # ============================================================
825
+ # SAVE RESULTS
826
+ # ============================================================
827
+
828
+ results = {
829
+ 'best_val_accuracy': best_val_acc,
830
+ 'test_accuracy': test_acc,
831
+ 'test_within_one': within_one,
832
+ 'history': history,
833
+ 'config': CONFIG,
834
+ 'train_time_minutes': total_train_time / 60,
835
+ 'classification_report': report,
836
+ 'confusion_matrix': cm.tolist()
837
+ }
838
+ torch.save(results, os.path.join(CONFIG['output_dir'], 'results.pt'))
839
+
840
+ print()
841
+ print("=" * 70)
842
+ print("🎉 ALL DONE!")
843
+ print("=" * 70)
844
+ print(f" Best Val Accuracy: {best_val_acc:.2f}%")
845
+ print(f" Test Accuracy: {test_acc:.2f}%")
846
+ print(f" Within ±1 Star: {within_one:.2f}%")
847
+ print(f" Training Time: {total_train_time/60:.1f} minutes")
848
+ print()
849
+ print(" Per-Class Recall:")
850
+ for c in classes:
851
+ recall = report[c]['recall'] * 100
852
+ indicator = "✓" if recall >= 60 else "⚠️" if recall >= 40 else "✗"
853
+ print(f" {indicator} {c}: {recall:.1f}%")
854
+ print("=" * 70)
855
+
856
+
857
+ # ============================================================
858
+ # ENTRY POINT
859
+ # ============================================================
860
+
861
+ if __name__ == '__main__':
862
+ main()
course_feedback_nlp/train_3_classes.py ADDED
@@ -0,0 +1,872 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Student Feedback Sentiment Model - Training Script
3
+ ==================================================
4
+ Optimized for Teacher/Agent use case:
5
+ - 3 classes: Negative, Neutral, Positive
6
+ - High recall on negative feedback (don't miss struggling students)
7
+ - Confidence scores for uncertainty
8
+ - Fast inference for agent integration
9
+
10
+ FIXED: save_checkpoint now properly preserves model config.json
11
+ """
12
+
13
+ import torch
14
+ import torch.nn as nn
15
+ import torch.nn.functional as F
16
+ from torch.utils.data import Dataset, DataLoader, TensorDataset
17
+ import pandas as pd
18
+ import numpy as np
19
+ from sklearn.model_selection import train_test_split
20
+ from sklearn.metrics import classification_report, confusion_matrix
21
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
22
+ from transformers import get_linear_schedule_with_warmup
23
+ from tqdm.auto import tqdm
24
+ import matplotlib.pyplot as plt
25
+ import seaborn as sns
26
+ import os
27
+ import time
28
+ import gc
29
+ import json
30
+ import warnings
31
+ warnings.filterwarnings('ignore')
32
+
33
+ # ============================================================
34
+ # AMD CRASH PROTECTION
35
+ # ============================================================
36
+ os.environ['AMD_LOG_LEVEL'] = '0'
37
+ os.environ['ROCM_LOG_LEVEL'] = '0'
38
+ os.environ['HIP_VISIBLE_DEVICES'] = '0'
39
+
40
+ # ============================================================
41
+ # CONFIGURATION - OPTIMIZED FOR TEACHER USE CASE
42
+ # ============================================================
43
+
44
+ CONFIG = {
45
+ # ==================== DATA ====================
46
+ 'data_path': 'Coursera_reviews.csv',
47
+ 'model_name': './distilbert-base-uncased',
48
+ 'output_dir': 'teacher_sentiment_model',
49
+ 'checkpoint_dir': 'checkpoints_teacher',
50
+
51
+ # ==================== CLASS MAPPING ====================
52
+ # Map 5-star ratings to 3 classes
53
+ 'num_classes': 3,
54
+ 'class_names': ['Negative', 'Neutral', 'Positive'],
55
+ 'class_mapping': {
56
+ 0: 0, # 1-star → Negative (0)
57
+ 1: 0, # 2-star → Negative (0)
58
+ 2: 1, # 3-star → Neutral (1)
59
+ 3: 2, # 4-star → Positive (2)
60
+ 4: 2, # 5-star → Positive (2)
61
+ },
62
+
63
+ # ==================== TOKENIZATION ====================
64
+ 'max_length': 96,
65
+
66
+ # ==================== TRAINING ====================
67
+ 'batch_size': 128,
68
+ 'gradient_accumulation_steps': 2,
69
+ 'epochs': 7,
70
+ 'learning_rate': 2e-5,
71
+ 'weight_decay': 0.01,
72
+ 'warmup_ratio': 0.06,
73
+ 'max_grad_norm': 1.0,
74
+
75
+ # ==================== SCHEDULER ====================
76
+ 'scheduler_type': 'cosine',
77
+ 'cosine_min_lr_ratio': 0.01,
78
+
79
+ # ==================== LOSS FUNCTION ====================
80
+ 'loss_type': 'focal', # Focal loss to focus on hard examples
81
+ 'focal_gamma': 2.0,
82
+ 'label_smoothing': 0.05, # Light smoothing for calibration
83
+
84
+ # ==================== CLASS IMBALANCE ====================
85
+ # IMPORTANT: Weight negative class higher - we don't want to miss struggling students!
86
+ 'use_class_weights': True,
87
+ 'class_weight_power': 0.7, # Moderate-high weighting for minorities
88
+ 'negative_class_boost': 1.5, # Extra boost for negative class (teacher priority)
89
+
90
+ # ==================== EARLY STOPPING ====================
91
+ 'early_stopping': True,
92
+ 'early_stopping_patience': 3,
93
+ 'early_stopping_metric': 'val_loss',
94
+
95
+ # ==================== HARDWARE ====================
96
+ 'seed': 42,
97
+ 'num_workers': 4,
98
+ 'pin_memory': True,
99
+ 'use_amp': True,
100
+
101
+ # ==================== CHECKPOINTING ====================
102
+ 'checkpoint_every_epoch': True,
103
+ 'save_total_limit': 3,
104
+
105
+ # ==================== DATA SPLIT ====================
106
+ 'train_size': 0.8,
107
+ 'val_size': 0.1,
108
+ 'test_size': 0.1,
109
+ }
110
+
111
+
112
+ # ============================================================
113
+ # CUSTOM LOSS FUNCTIONS
114
+ # ============================================================
115
+
116
+ class FocalLoss(nn.Module):
117
+ """
118
+ Focal Loss with label smoothing.
119
+ Focuses training on hard-to-classify examples.
120
+ """
121
+ def __init__(self, num_classes=3, gamma=2.0, alpha=None, label_smoothing=0.0):
122
+ super().__init__()
123
+ self.num_classes = num_classes
124
+ self.gamma = gamma
125
+ self.label_smoothing = label_smoothing
126
+
127
+ if alpha is not None:
128
+ self.register_buffer('alpha', alpha)
129
+ else:
130
+ self.alpha = None
131
+
132
+ def forward(self, logits, targets):
133
+ probs = F.softmax(logits, dim=-1)
134
+ pt = probs.gather(1, targets.unsqueeze(1)).squeeze(1)
135
+
136
+ # Focal weight
137
+ focal_weight = (1 - pt) ** self.gamma
138
+
139
+ # Cross entropy with optional label smoothing
140
+ if self.label_smoothing > 0:
141
+ confidence = 1.0 - self.label_smoothing
142
+ smooth_value = self.label_smoothing / (self.num_classes - 1)
143
+ one_hot = torch.zeros_like(logits).scatter_(1, targets.unsqueeze(1), 1)
144
+ smooth_targets = one_hot * confidence + (1 - one_hot) * smooth_value
145
+ log_probs = F.log_softmax(logits, dim=-1)
146
+ ce = -(smooth_targets * log_probs).sum(dim=-1)
147
+ else:
148
+ ce = F.cross_entropy(logits, targets, reduction='none')
149
+
150
+ loss = focal_weight * ce
151
+
152
+ if self.alpha is not None:
153
+ alpha_t = self.alpha[targets]
154
+ loss = alpha_t * loss
155
+
156
+ return loss.mean()
157
+
158
+
159
+ # ============================================================
160
+ # UTILITY FUNCTIONS
161
+ # ============================================================
162
+
163
+ def set_seed(seed):
164
+ torch.manual_seed(seed)
165
+ torch.cuda.manual_seed_all(seed)
166
+ np.random.seed(seed)
167
+ torch.backends.cudnn.deterministic = True
168
+
169
+
170
+ def get_scheduler(optimizer, scheduler_type, total_steps, warmup_steps, config):
171
+ if scheduler_type == 'cosine':
172
+ min_lr_ratio = config.get('cosine_min_lr_ratio', 0.01)
173
+
174
+ def lr_lambda(current_step):
175
+ if current_step < warmup_steps:
176
+ return float(current_step) / float(max(1, warmup_steps))
177
+ progress = float(current_step - warmup_steps) / float(max(1, total_steps - warmup_steps))
178
+ return max(min_lr_ratio, 0.5 * (1.0 + np.cos(np.pi * progress)))
179
+
180
+ return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
181
+ else:
182
+ return get_linear_schedule_with_warmup(optimizer, warmup_steps, total_steps)
183
+
184
+
185
+ def save_checkpoint(model, tokenizer, optimizer, scheduler, scaler, epoch,
186
+ val_acc, val_loss, history, config, path, is_best=False):
187
+ """
188
+ Save training checkpoint.
189
+
190
+ FIXED: Now saves training_config.json separately instead of overwriting
191
+ the model's config.json (which needs model_type for loading).
192
+ """
193
+ checkpoint = {
194
+ 'epoch': epoch,
195
+ 'model_state_dict': model.state_dict(),
196
+ 'optimizer_state_dict': optimizer.state_dict(),
197
+ 'scheduler_state_dict': scheduler.state_dict(),
198
+ 'scaler_state_dict': scaler.state_dict() if scaler else None,
199
+ 'val_accuracy': val_acc,
200
+ 'val_loss': val_loss,
201
+ 'history': history,
202
+ 'config': config,
203
+ }
204
+ torch.save(checkpoint, path)
205
+
206
+ if is_best:
207
+ # Save model and tokenizer - this creates the correct config.json with model_type
208
+ model.save_pretrained(config['output_dir'])
209
+ tokenizer.save_pretrained(config['output_dir'])
210
+
211
+ # FIXED: Save our custom training config to a SEPARATE file
212
+ # DO NOT overwrite the model's config.json!
213
+ training_config_path = os.path.join(config['output_dir'], 'training_config.json')
214
+ training_config = {
215
+ 'num_classes': config['num_classes'],
216
+ 'class_names': config['class_names'],
217
+ 'class_mapping': {str(k): v for k, v in config['class_mapping'].items()}, # JSON needs string keys
218
+ 'max_length': config['max_length'],
219
+ }
220
+ with open(training_config_path, 'w') as f:
221
+ json.dump(training_config, f, indent=2)
222
+
223
+ # Also update the model's config.json with our label mappings (properly!)
224
+ model_config = AutoConfig.from_pretrained(config['output_dir'])
225
+ model_config.num_labels = config['num_classes']
226
+ model_config.id2label = {i: name for i, name in enumerate(config['class_names'])}
227
+ model_config.label2id = {name: i for i, name in enumerate(config['class_names'])}
228
+ model_config.save_pretrained(config['output_dir'])
229
+
230
+
231
+ def cleanup_old_checkpoints(checkpoint_dir, save_total_limit):
232
+ if save_total_limit is None or save_total_limit <= 0:
233
+ return
234
+ checkpoints = sorted([
235
+ f for f in os.listdir(checkpoint_dir)
236
+ if f.startswith('checkpoint_epoch_') and f.endswith('.pt')
237
+ ])
238
+ while len(checkpoints) > save_total_limit:
239
+ oldest = checkpoints.pop(0)
240
+ os.remove(os.path.join(checkpoint_dir, oldest))
241
+
242
+
243
+ def tokenize_batch(texts, tokenizer, max_length, desc="Tokenizing"):
244
+ all_input_ids = []
245
+ all_attention_masks = []
246
+ batch_size = 10000
247
+
248
+ for i in tqdm(range(0, len(texts), batch_size), desc=desc):
249
+ batch_texts = texts[i:i+batch_size].tolist()
250
+ encodings = tokenizer(
251
+ batch_texts,
252
+ truncation=True,
253
+ padding='max_length',
254
+ max_length=max_length,
255
+ return_tensors='pt'
256
+ )
257
+ all_input_ids.append(encodings['input_ids'])
258
+ all_attention_masks.append(encodings['attention_mask'])
259
+
260
+ return torch.cat(all_input_ids, dim=0), torch.cat(all_attention_masks, dim=0)
261
+
262
+
263
+ # ============================================================
264
+ # MAIN FUNCTION
265
+ # ============================================================
266
+
267
+ def main():
268
+ set_seed(CONFIG['seed'])
269
+
270
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
271
+
272
+ print("=" * 70)
273
+ print("STUDENT FEEDBACK SENTIMENT MODEL")
274
+ print("Optimized for Teacher/Agent Use Case")
275
+ print("=" * 70)
276
+ print()
277
+ print("TARGET CLASSES:")
278
+ print(" 🔴 Negative (1-2 stars) → 'Needs Attention'")
279
+ print(" 🟡 Neutral (3 stars) → 'Mixed/Unclear'")
280
+ print(" 🟢 Positive (4-5 stars) → 'Satisfied'")
281
+ print()
282
+ print(f"Device: {device}")
283
+ if torch.cuda.is_available():
284
+ print(f"GPU: {torch.cuda.get_device_name(0)}")
285
+ print("=" * 70)
286
+ print()
287
+
288
+ # Create directories
289
+ os.makedirs(CONFIG['output_dir'], exist_ok=True)
290
+ os.makedirs(CONFIG['checkpoint_dir'], exist_ok=True)
291
+ os.makedirs('plots', exist_ok=True)
292
+
293
+ # ============================================================
294
+ # DATA LOADING & PREPROCESSING
295
+ # ============================================================
296
+
297
+ print("LOADING DATA")
298
+ print("-" * 70)
299
+
300
+ df = pd.read_csv(CONFIG['data_path'])
301
+ print(f"Raw data: {len(df):,} samples")
302
+
303
+ # Clean
304
+ df = df.dropna(subset=['reviews', 'rating'])
305
+ df = df[df['reviews'].str.strip() != '']
306
+ df['rating'] = df['rating'].astype(int)
307
+ df = df[df['rating'].between(1, 5)]
308
+
309
+ # Original 5-class labels
310
+ df['label_5class'] = df['rating'] - 1
311
+
312
+ # Map to 3 classes
313
+ df['label'] = df['label_5class'].map(CONFIG['class_mapping'])
314
+
315
+ print(f"Cleaned data: {len(df):,} samples")
316
+ print()
317
+
318
+ # Show original distribution
319
+ print("Original 5-class distribution:")
320
+ for rating in range(1, 6):
321
+ count = (df['rating'] == rating).sum()
322
+ pct = 100 * count / len(df)
323
+ print(f" {rating} Star: {count:>8,} ({pct:>5.1f}%)")
324
+ print()
325
+
326
+ # Show new 3-class distribution
327
+ print("New 3-class distribution:")
328
+ class_counts_3 = []
329
+ for label, name in enumerate(CONFIG['class_names']):
330
+ count = (df['label'] == label).sum()
331
+ pct = 100 * count / len(df)
332
+ class_counts_3.append(count)
333
+ emoji = ['🔴', '🟡', '🟢'][label]
334
+ print(f" {emoji} {name}: {count:>8,} ({pct:>5.1f}%)")
335
+ print()
336
+
337
+ # ============================================================
338
+ # CALCULATE CLASS WEIGHTS
339
+ # ============================================================
340
+
341
+ if CONFIG['use_class_weights']:
342
+ print("Calculating class weights...")
343
+ class_counts = np.array(class_counts_3)
344
+
345
+ # Inverse frequency
346
+ weights = 1.0 / class_counts
347
+ weights = weights / weights.sum() * len(weights)
348
+
349
+ # Apply power scaling
350
+ power = CONFIG['class_weight_power']
351
+ weights = weights ** power
352
+ weights = weights / weights.sum() * len(weights)
353
+
354
+ # Extra boost for negative class (teacher priority!)
355
+ negative_boost = CONFIG.get('negative_class_boost', 1.0)
356
+ weights[0] = weights[0] * negative_boost
357
+
358
+ # Re-normalize
359
+ weights = weights / weights.sum() * len(weights)
360
+
361
+ class_weights = torch.tensor(weights, dtype=torch.float32)
362
+
363
+ print("Class weights (higher = more important):")
364
+ for i, (name, w) in enumerate(zip(CONFIG['class_names'], class_weights)):
365
+ bar = "█" * int(w * 15)
366
+ boost_note = " ← BOOSTED (teacher priority)" if i == 0 else ""
367
+ print(f" {name}: {w:.4f} {bar}{boost_note}")
368
+ print()
369
+ else:
370
+ class_weights = None
371
+
372
+ # ============================================================
373
+ # TRAIN / VAL / TEST SPLIT
374
+ # ============================================================
375
+
376
+ print("SPLITTING DATA")
377
+ print("-" * 70)
378
+
379
+ X_temp, X_test, y_temp, y_test = train_test_split(
380
+ df['reviews'].values, df['label'].values,
381
+ test_size=CONFIG['test_size'],
382
+ random_state=CONFIG['seed'],
383
+ stratify=df['label'].values
384
+ )
385
+
386
+ val_ratio = CONFIG['val_size'] / (CONFIG['train_size'] + CONFIG['val_size'])
387
+ X_train, X_val, y_train, y_val = train_test_split(
388
+ X_temp, y_temp,
389
+ test_size=val_ratio,
390
+ random_state=CONFIG['seed'],
391
+ stratify=y_temp
392
+ )
393
+
394
+ print(f"Train: {len(X_train):,} | Val: {len(X_val):,} | Test: {len(X_test):,}")
395
+ print()
396
+
397
+ del df
398
+ gc.collect()
399
+
400
+ # ============================================================
401
+ # TOKENIZATION
402
+ # ============================================================
403
+
404
+ print("TOKENIZATION")
405
+ print("-" * 70)
406
+
407
+ tokenizer = AutoTokenizer.from_pretrained(CONFIG['model_name'], local_files_only=True)
408
+
409
+ train_ids, train_masks = tokenize_batch(X_train, tokenizer, CONFIG['max_length'], "Train")
410
+ val_ids, val_masks = tokenize_batch(X_val, tokenizer, CONFIG['max_length'], "Val")
411
+ test_ids, test_masks = tokenize_batch(X_test, tokenizer, CONFIG['max_length'], "Test")
412
+
413
+ train_labels = torch.tensor(y_train, dtype=torch.long)
414
+ val_labels = torch.tensor(y_val, dtype=torch.long)
415
+ test_labels = torch.tensor(y_test, dtype=torch.long)
416
+
417
+ del X_train, X_val, X_test, y_train, y_val, y_test, X_temp, y_temp
418
+ gc.collect()
419
+
420
+ print()
421
+
422
+ # ============================================================
423
+ # DATALOADERS
424
+ # ============================================================
425
+
426
+ train_dataset = TensorDataset(train_ids, train_masks, train_labels)
427
+ val_dataset = TensorDataset(val_ids, val_masks, val_labels)
428
+ test_dataset = TensorDataset(test_ids, test_masks, test_labels)
429
+
430
+ train_loader = DataLoader(
431
+ train_dataset, batch_size=CONFIG['batch_size'], shuffle=True,
432
+ num_workers=CONFIG['num_workers'], pin_memory=CONFIG['pin_memory'],
433
+ persistent_workers=True, drop_last=True
434
+ )
435
+ val_loader = DataLoader(
436
+ val_dataset, batch_size=CONFIG['batch_size'], shuffle=False,
437
+ num_workers=CONFIG['num_workers'], pin_memory=CONFIG['pin_memory'],
438
+ persistent_workers=True
439
+ )
440
+ test_loader = DataLoader(
441
+ test_dataset, batch_size=CONFIG['batch_size'], shuffle=False,
442
+ num_workers=CONFIG['num_workers'], pin_memory=CONFIG['pin_memory'],
443
+ persistent_workers=True
444
+ )
445
+
446
+ print(f"Train batches: {len(train_loader):,}")
447
+ print()
448
+
449
+ # ============================================================
450
+ # MODEL (3 classes!)
451
+ # ============================================================
452
+
453
+ print("LOADING MODEL")
454
+ print("-" * 70)
455
+
456
+ model = AutoModelForSequenceClassification.from_pretrained(
457
+ CONFIG['model_name'],
458
+ num_labels=CONFIG['num_classes'], # 3 classes!
459
+ local_files_only=True
460
+ )
461
+ model = model.to(device)
462
+ print(f"Model loaded with {CONFIG['num_classes']} output classes")
463
+ print()
464
+
465
+ # ============================================================
466
+ # LOSS FUNCTION
467
+ # ============================================================
468
+
469
+ if class_weights is not None:
470
+ class_weights = class_weights.to(device)
471
+
472
+ criterion = FocalLoss(
473
+ num_classes=CONFIG['num_classes'],
474
+ gamma=CONFIG['focal_gamma'],
475
+ alpha=class_weights,
476
+ label_smoothing=CONFIG['label_smoothing']
477
+ )
478
+
479
+ print(f"Loss: Focal (γ={CONFIG['focal_gamma']}) + Label Smoothing ({CONFIG['label_smoothing']})")
480
+ print()
481
+
482
+ # ============================================================
483
+ # OPTIMIZER & SCHEDULER
484
+ # ============================================================
485
+
486
+ optimizer = torch.optim.AdamW(
487
+ model.parameters(),
488
+ lr=CONFIG['learning_rate'],
489
+ weight_decay=CONFIG['weight_decay']
490
+ )
491
+
492
+ accum_steps = CONFIG['gradient_accumulation_steps']
493
+ steps_per_epoch = len(train_loader) // accum_steps
494
+ total_steps = steps_per_epoch * CONFIG['epochs']
495
+ warmup_steps = int(total_steps * CONFIG['warmup_ratio'])
496
+
497
+ scheduler = get_scheduler(optimizer, CONFIG['scheduler_type'], total_steps, warmup_steps, CONFIG)
498
+ scaler = torch.amp.GradScaler('cuda') if CONFIG['use_amp'] else None
499
+
500
+ # ============================================================
501
+ # TRAINING LOOP
502
+ # ============================================================
503
+
504
+ print("=" * 70)
505
+ print("TRAINING")
506
+ print("=" * 70)
507
+ print()
508
+
509
+ best_val_acc = 0
510
+ best_val_loss = float('inf')
511
+ patience_counter = 0
512
+ history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'lr': []}
513
+ total_train_time = 0
514
+
515
+ try:
516
+ for epoch in range(CONFIG['epochs']):
517
+ epoch_start = time.time()
518
+
519
+ # === TRAIN ===
520
+ model.train()
521
+ train_loss, train_correct, train_total = 0, 0, 0
522
+ optimizer.zero_grad()
523
+
524
+ pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [Train]", ncols=120)
525
+
526
+ for batch_idx, (input_ids, attention_mask, labels) in enumerate(pbar):
527
+ input_ids = input_ids.to(device, non_blocking=True)
528
+ attention_mask = attention_mask.to(device, non_blocking=True)
529
+ labels = labels.to(device, non_blocking=True)
530
+
531
+ if CONFIG['use_amp']:
532
+ with torch.amp.autocast('cuda'):
533
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
534
+ loss = criterion(outputs.logits, labels) / accum_steps
535
+ scaler.scale(loss).backward()
536
+ else:
537
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
538
+ loss = criterion(outputs.logits, labels) / accum_steps
539
+ loss.backward()
540
+
541
+ if (batch_idx + 1) % accum_steps == 0:
542
+ if CONFIG['use_amp']:
543
+ scaler.unscale_(optimizer)
544
+ torch.nn.utils.clip_grad_norm_(model.parameters(), CONFIG['max_grad_norm'])
545
+ scaler.step(optimizer)
546
+ scaler.update()
547
+ else:
548
+ torch.nn.utils.clip_grad_norm_(model.parameters(), CONFIG['max_grad_norm'])
549
+ optimizer.step()
550
+ scheduler.step()
551
+ optimizer.zero_grad()
552
+
553
+ train_loss += loss.item() * accum_steps
554
+ _, pred = outputs.logits.max(1)
555
+ train_total += labels.size(0)
556
+ train_correct += pred.eq(labels).sum().item()
557
+
558
+ pbar.set_postfix({
559
+ 'loss': f'{train_loss/(batch_idx+1):.4f}',
560
+ 'acc': f'{100*train_correct/train_total:.1f}%'
561
+ })
562
+
563
+ train_loss /= len(train_loader)
564
+ train_acc = 100 * train_correct / train_total
565
+
566
+ # === VALIDATION ===
567
+ model.eval()
568
+ val_loss, val_correct, val_total = 0, 0, 0
569
+
570
+ with torch.no_grad():
571
+ for input_ids, attention_mask, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [Val]", ncols=120):
572
+ input_ids = input_ids.to(device, non_blocking=True)
573
+ attention_mask = attention_mask.to(device, non_blocking=True)
574
+ labels = labels.to(device, non_blocking=True)
575
+
576
+ if CONFIG['use_amp']:
577
+ with torch.amp.autocast('cuda'):
578
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
579
+ loss = criterion(outputs.logits, labels)
580
+ else:
581
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
582
+ loss = criterion(outputs.logits, labels)
583
+
584
+ val_loss += loss.item()
585
+ _, pred = outputs.logits.max(1)
586
+ val_total += labels.size(0)
587
+ val_correct += pred.eq(labels).sum().item()
588
+
589
+ val_loss /= len(val_loader)
590
+ val_acc = 100 * val_correct / val_total
591
+
592
+ epoch_time = time.time() - epoch_start
593
+ total_train_time += epoch_time
594
+
595
+ history['train_loss'].append(train_loss)
596
+ history['train_acc'].append(train_acc)
597
+ history['val_loss'].append(val_loss)
598
+ history['val_acc'].append(val_acc)
599
+ history['lr'].append(scheduler.get_last_lr()[0])
600
+
601
+ # === EPOCH SUMMARY ===
602
+ print()
603
+ print(f" Epoch {epoch+1}: Train Loss={train_loss:.4f}, Acc={train_acc:.2f}% | Val Loss={val_loss:.4f}, Acc={val_acc:.2f}% | Time={epoch_time:.0f}s")
604
+
605
+ # Checkpointing
606
+ is_best = val_loss < best_val_loss
607
+ if is_best:
608
+ best_val_loss = val_loss
609
+ patience_counter = 0
610
+ else:
611
+ patience_counter += 1
612
+
613
+ if val_acc > best_val_acc:
614
+ best_val_acc = val_acc
615
+
616
+ if CONFIG['checkpoint_every_epoch']:
617
+ ckpt_path = os.path.join(CONFIG['checkpoint_dir'], f'checkpoint_epoch_{epoch+1}.pt')
618
+ save_checkpoint(model, tokenizer, optimizer, scheduler, scaler,
619
+ epoch+1, val_acc, val_loss, history, CONFIG, ckpt_path, is_best)
620
+ cleanup_old_checkpoints(CONFIG['checkpoint_dir'], CONFIG['save_total_limit'])
621
+
622
+ if is_best:
623
+ print(f" 🏆 New best model saved!")
624
+
625
+ if CONFIG['early_stopping'] and patience_counter >= CONFIG['early_stopping_patience']:
626
+ print(f"\n 🛑 Early stopping after {epoch+1} epochs")
627
+ break
628
+
629
+ print()
630
+
631
+ except Exception as e:
632
+ print(f"\n⚠️ Error: {e}")
633
+ emergency_dir = CONFIG['output_dir'] + '_emergency'
634
+ os.makedirs(emergency_dir, exist_ok=True)
635
+ model.save_pretrained(emergency_dir)
636
+ tokenizer.save_pretrained(emergency_dir)
637
+ raise
638
+
639
+ print("=" * 70)
640
+ print(f"TRAINING COMPLETE - {total_train_time/60:.1f} minutes")
641
+ print("=" * 70)
642
+ print()
643
+
644
+ # ============================================================
645
+ # FINAL TEST EVALUATION
646
+ # ============================================================
647
+
648
+ print("FINAL TEST EVALUATION")
649
+ print("-" * 70)
650
+
651
+ # Load best model (now works without fix!)
652
+ model = AutoModelForSequenceClassification.from_pretrained(
653
+ CONFIG['output_dir'], local_files_only=True
654
+ )
655
+ model = model.to(device)
656
+ model.eval()
657
+
658
+ all_preds, all_labels, all_probs = [], [], []
659
+
660
+ with torch.no_grad():
661
+ for input_ids, attention_mask, labels in tqdm(test_loader, desc="Testing"):
662
+ input_ids = input_ids.to(device)
663
+ attention_mask = attention_mask.to(device)
664
+
665
+ if CONFIG['use_amp']:
666
+ with torch.amp.autocast('cuda'):
667
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
668
+ else:
669
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
670
+
671
+ probs = F.softmax(outputs.logits, dim=-1)
672
+ _, preds = outputs.logits.max(1)
673
+
674
+ all_preds.extend(preds.cpu().numpy())
675
+ all_labels.extend(labels.numpy())
676
+ all_probs.extend(probs.cpu().numpy())
677
+
678
+ all_preds = np.array(all_preds)
679
+ all_labels = np.array(all_labels)
680
+ all_probs = np.array(all_probs)
681
+
682
+ test_acc = 100 * (all_preds == all_labels).mean()
683
+
684
+ print()
685
+ print(f"Test Accuracy: {test_acc:.2f}%")
686
+ print()
687
+
688
+ # Classification Report
689
+ print("CLASSIFICATION REPORT")
690
+ print("-" * 70)
691
+ report = classification_report(
692
+ all_labels, all_preds,
693
+ target_names=CONFIG['class_names'],
694
+ digits=3,
695
+ output_dict=True
696
+ )
697
+ print(classification_report(
698
+ all_labels, all_preds,
699
+ target_names=CONFIG['class_names'],
700
+ digits=3
701
+ ))
702
+
703
+ # ============================================================
704
+ # TEACHER-FOCUSED METRICS
705
+ # ============================================================
706
+
707
+ print()
708
+ print("=" * 70)
709
+ print("📊 TEACHER-FOCUSED METRICS")
710
+ print("=" * 70)
711
+ print()
712
+
713
+ # Negative class recall (MOST IMPORTANT for teachers)
714
+ negative_recall = report['Negative']['recall'] * 100
715
+ negative_precision = report['Negative']['precision'] * 100
716
+
717
+ print(f" 🔴 NEGATIVE FEEDBACK DETECTION (Struggling Students):")
718
+ print(f" Recall: {negative_recall:.1f}% ← {negative_recall:.0f}% of struggling students caught")
719
+ print(f" Precision: {negative_precision:.1f}% ← {negative_precision:.0f}% of flags are real issues")
720
+ print()
721
+
722
+ # False negative analysis (missed struggling students)
723
+ false_negatives = ((all_labels == 0) & (all_preds != 0)).sum()
724
+ total_negatives = (all_labels == 0).sum()
725
+ missed_pct = 100 * false_negatives / total_negatives
726
+
727
+ print(f" ⚠️ MISSED STRUGGLING STUDENTS:")
728
+ print(f" {false_negatives:,} of {total_negatives:,} negative cases missed ({missed_pct:.1f}%)")
729
+ print()
730
+
731
+ # Confidence analysis
732
+ pred_confidence = all_probs.max(axis=1)
733
+ low_confidence = (pred_confidence < 0.7).sum()
734
+ low_conf_pct = 100 * low_confidence / len(pred_confidence)
735
+
736
+ print(f" 🤔 UNCERTAIN PREDICTIONS (confidence < 70%):")
737
+ print(f" {low_confidence:,} of {len(pred_confidence):,} predictions ({low_conf_pct:.1f}%)")
738
+ print(f" → These should be flagged for manual review")
739
+ print()
740
+
741
+ # ============================================================
742
+ # PLOTS
743
+ # ============================================================
744
+
745
+ # Confusion Matrix
746
+ fig, axes = plt.subplots(1, 2, figsize=(14, 5))
747
+
748
+ cm = confusion_matrix(all_labels, all_preds)
749
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
750
+ xticklabels=CONFIG['class_names'],
751
+ yticklabels=CONFIG['class_names'], ax=axes[0])
752
+ axes[0].set_xlabel('Predicted')
753
+ axes[0].set_ylabel('Actual')
754
+ axes[0].set_title('Confusion Matrix (Counts)')
755
+
756
+ cm_norm = cm.astype(float) / cm.sum(axis=1, keepdims=True)
757
+ sns.heatmap(cm_norm, annot=True, fmt='.1%', cmap='Blues',
758
+ xticklabels=CONFIG['class_names'],
759
+ yticklabels=CONFIG['class_names'], ax=axes[1])
760
+ axes[1].set_xlabel('Predicted')
761
+ axes[1].set_ylabel('Actual')
762
+ axes[1].set_title('Confusion Matrix (Recall)')
763
+
764
+ plt.tight_layout()
765
+ plt.savefig('plots/confusion_matrix_3class.png', dpi=150)
766
+ print("✓ Saved: plots/confusion_matrix_3class.png")
767
+
768
+ # Per-class metrics
769
+ fig, ax = plt.subplots(figsize=(10, 6))
770
+ x = np.arange(3)
771
+ width = 0.25
772
+
773
+ recalls = [report[c]['recall']*100 for c in CONFIG['class_names']]
774
+ precisions = [report[c]['precision']*100 for c in CONFIG['class_names']]
775
+ f1s = [report[c]['f1-score']*100 for c in CONFIG['class_names']]
776
+
777
+ bars1 = ax.bar(x - width, recalls, width, label='Recall', color='#e74c3c')
778
+ bars2 = ax.bar(x, precisions, width, label='Precision', color='#3498db')
779
+ bars3 = ax.bar(x + width, f1s, width, label='F1-Score', color='#2ecc71')
780
+
781
+ ax.set_ylabel('Score (%)')
782
+ ax.set_title('Per-Class Metrics (3-Class Model)')
783
+ ax.set_xticks(x)
784
+ ax.set_xticklabels(['🔴 Negative\n(Needs Attention)', '🟡 Neutral\n(Mixed)', '🟢 Positive\n(Satisfied)'])
785
+ ax.legend()
786
+ ax.set_ylim(0, 105)
787
+ ax.axhline(y=80, color='gray', linestyle='--', alpha=0.5)
788
+
789
+ for bars in [bars1, bars2, bars3]:
790
+ for bar in bars:
791
+ height = bar.get_height()
792
+ ax.annotate(f'{height:.0f}%', xy=(bar.get_x() + bar.get_width()/2, height),
793
+ xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=9)
794
+
795
+ plt.tight_layout()
796
+ plt.savefig('plots/per_class_metrics_3class.png', dpi=150)
797
+ print("✓ Saved: plots/per_class_metrics_3class.png")
798
+
799
+ # Training history
800
+ fig, axes = plt.subplots(1, 2, figsize=(12, 4))
801
+ epochs_range = range(1, len(history['train_loss']) + 1)
802
+
803
+ axes[0].plot(epochs_range, history['train_loss'], 'b-o', label='Train')
804
+ axes[0].plot(epochs_range, history['val_loss'], 'r-o', label='Val')
805
+ axes[0].set_xlabel('Epoch')
806
+ axes[0].set_ylabel('Loss')
807
+ axes[0].set_title('Training Loss')
808
+ axes[0].legend()
809
+ axes[0].grid(True, alpha=0.3)
810
+
811
+ axes[1].plot(epochs_range, history['train_acc'], 'b-o', label='Train')
812
+ axes[1].plot(epochs_range, history['val_acc'], 'r-o', label='Val')
813
+ axes[1].set_xlabel('Epoch')
814
+ axes[1].set_ylabel('Accuracy (%)')
815
+ axes[1].set_title('Training Accuracy')
816
+ axes[1].legend()
817
+ axes[1].grid(True, alpha=0.3)
818
+
819
+ plt.tight_layout()
820
+ plt.savefig('plots/training_history_3class.png', dpi=150)
821
+ print("✓ Saved: plots/training_history_3class.png")
822
+
823
+ # ============================================================
824
+ # SAVE RESULTS
825
+ # ============================================================
826
+
827
+ results = {
828
+ 'test_accuracy': test_acc,
829
+ 'negative_recall': negative_recall,
830
+ 'negative_precision': negative_precision,
831
+ 'missed_struggling_students': int(false_negatives),
832
+ 'total_negative_cases': int(total_negatives),
833
+ 'low_confidence_predictions': int(low_confidence),
834
+ 'config': CONFIG,
835
+ 'classification_report': report,
836
+ 'training_time_minutes': total_train_time / 60,
837
+ }
838
+
839
+ torch.save(results, os.path.join(CONFIG['output_dir'], 'results.pt'))
840
+
841
+ with open(os.path.join(CONFIG['output_dir'], 'results.json'), 'w') as f:
842
+ save_results = {k: v for k, v in results.items() if k not in ['config', 'classification_report']}
843
+ save_results['per_class_recall'] = {c: report[c]['recall'] for c in CONFIG['class_names']}
844
+ json.dump(save_results, f, indent=2)
845
+
846
+ # ============================================================
847
+ # FINAL SUMMARY
848
+ # ============================================================
849
+
850
+ print()
851
+ print("=" * 70)
852
+ print("🎉 TRAINING COMPLETE!")
853
+ print("=" * 70)
854
+ print()
855
+ print(f" Model saved to: {CONFIG['output_dir']}/")
856
+ print()
857
+ print(" RESULTS:")
858
+ print(f" Test Accuracy: {test_acc:.1f}%")
859
+ print(f" Negative Recall: {negative_recall:.1f}% ← Catches {negative_recall:.0f}% of struggling students")
860
+ print(f" Negative Precision: {negative_precision:.1f}%")
861
+ print()
862
+ print(" PER-CLASS RECALL:")
863
+ for name in CONFIG['class_names']:
864
+ recall = report[name]['recall'] * 100
865
+ emoji = '🔴' if name == 'Negative' else ('🟡' if name == 'Neutral' else '🟢')
866
+ print(f" {emoji} {name}: {recall:.1f}%")
867
+ print()
868
+ print("=" * 70)
869
+
870
+
871
+ if __name__ == '__main__':
872
+ main()
dropout_binaryclass/correlation.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+ """
4
+ Feature Correlation Analysis
5
+ Helps identify redundant features and features most correlated with Target.
6
+ """
7
+
8
+ import pandas as pd
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ import seaborn as sns
12
+
13
+ # =============================================================================
14
+ # 1. LOAD DATA
15
+ # =============================================================================
16
+
17
+ df = pd.read_csv('data.csv', sep=';')
18
+ df = df[df['Target'] != 'Enrolled']
19
+ df['Target'] = df['Target'].map({'Dropout': 0, 'Graduate': 1})
20
+
21
+ print(f"Dataset shape: {df.shape}")
22
+ print(f"Features: {df.shape[1] - 1}")
23
+
24
+ # =============================================================================
25
+ # 2. CORRELATION WITH TARGET
26
+ # =============================================================================
27
+
28
+ print("\n" + "="*70)
29
+ print("CORRELATION WITH TARGET (Dropout=0, Graduate=1)")
30
+ print("="*70)
31
+
32
+ # Calculate correlation with target
33
+ target_corr = df.corr()['Target'].drop('Target').sort_values(key=abs, ascending=False)
34
+
35
+ print("\nAll features ranked by absolute correlation with Target:\n")
36
+ for i, (feature, corr) in enumerate(target_corr.items(), 1):
37
+ strength = "STRONG" if abs(corr) > 0.3 else "MODERATE" if abs(corr) > 0.15 else "WEAK"
38
+ print(f"{i:2d}. {feature:50s} {corr:+.4f} [{strength}]")
39
+
40
+ # Plot correlation with target
41
+ plt.figure(figsize=(12, 10))
42
+ colors = ['green' if c > 0 else 'red' for c in target_corr.values]
43
+ target_corr.plot(kind='barh', color=colors)
44
+ plt.title('Feature Correlation with Target (Graduate=1)')
45
+ plt.xlabel('Correlation Coefficient')
46
+ plt.axvline(x=0, color='black', linewidth=0.5)
47
+ plt.axvline(x=0.3, color='blue', linestyle='--', alpha=0.5, label='Strong threshold')
48
+ plt.axvline(x=-0.3, color='blue', linestyle='--', alpha=0.5)
49
+ plt.tight_layout()
50
+ plt.savefig('correlation_with_target.png', dpi=150)
51
+ plt.show()
52
+
53
+ # =============================================================================
54
+ # 3. FEATURE-TO-FEATURE CORRELATION (Find Redundant Features)
55
+ # =============================================================================
56
+
57
+ print("\n" + "="*70)
58
+ print("HIGHLY CORRELATED FEATURE PAIRS (Potential Redundancy)")
59
+ print("="*70)
60
+
61
+ # Calculate correlation matrix
62
+ corr_matrix = df.drop('Target', axis=1).corr()
63
+
64
+ # Find highly correlated pairs
65
+ high_corr_pairs = []
66
+ threshold = 0.7
67
+
68
+ for i in range(len(corr_matrix.columns)):
69
+ for j in range(i+1, len(corr_matrix.columns)):
70
+ corr_value = corr_matrix.iloc[i, j]
71
+ if abs(corr_value) >= threshold:
72
+ high_corr_pairs.append({
73
+ 'Feature 1': corr_matrix.columns[i],
74
+ 'Feature 2': corr_matrix.columns[j],
75
+ 'Correlation': corr_value
76
+ })
77
+
78
+ high_corr_df = pd.DataFrame(high_corr_pairs).sort_values('Correlation', key=abs, ascending=False)
79
+
80
+ print(f"\nFeature pairs with correlation >= {threshold}:\n")
81
+ if len(high_corr_df) > 0:
82
+ for _, row in high_corr_df.iterrows():
83
+ print(f" {row['Correlation']:+.4f} | {row['Feature 1']}")
84
+ print(f" | {row['Feature 2']}")
85
+ print()
86
+ else:
87
+ print(" No highly correlated pairs found.")
88
+
89
+ # =============================================================================
90
+ # 4. CORRELATION HEATMAP
91
+ # =============================================================================
92
+
93
+ plt.figure(figsize=(20, 16))
94
+ sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm',
95
+ center=0, square=True, linewidths=0.5,
96
+ annot_kws={'size': 6})
97
+ plt.title('Feature Correlation Matrix')
98
+ plt.tight_layout()
99
+ plt.savefig('correlation_matrix.png', dpi=150)
100
+ plt.show()
101
+
102
+ # =============================================================================
103
+ # 5. RECOMMENDATIONS FOR FEATURE SELECTION
104
+ # =============================================================================
105
+
106
+ print("\n" + "="*70)
107
+ print("FEATURE SELECTION RECOMMENDATIONS")
108
+ print("="*70)
109
+
110
+ # Weak correlation with target (candidates for removal)
111
+ weak_threshold = 0.05
112
+ weak_features = target_corr[abs(target_corr) < weak_threshold]
113
+
114
+ print(f"\n1. WEAK CORRELATION WITH TARGET (|corr| < {weak_threshold}):")
115
+ print(" Consider removing these - they may not help prediction:\n")
116
+ for feature, corr in weak_features.items():
117
+ print(f" - {feature}: {corr:+.4f}")
118
+
119
+ # Features to keep (strong correlation)
120
+ strong_threshold = 0.2
121
+ strong_features = target_corr[abs(target_corr) >= strong_threshold]
122
+
123
+ print(f"\n2. STRONG CORRELATION WITH TARGET (|corr| >= {strong_threshold}):")
124
+ print(" Keep these - they are predictive:\n")
125
+ for feature, corr in strong_features.items():
126
+ print(f" + {feature}: {corr:+.4f}")
127
+
128
+ # Redundant features (high correlation with each other)
129
+ print(f"\n3. REDUNDANT FEATURES (correlated with each other >= {threshold}):")
130
+ print(" Consider keeping only one from each pair:\n")
131
+ for _, row in high_corr_df.iterrows():
132
+ # Suggest keeping the one more correlated with target
133
+ corr1 = abs(target_corr.get(row['Feature 1'], 0))
134
+ corr2 = abs(target_corr.get(row['Feature 2'], 0))
135
+ keep = row['Feature 1'] if corr1 >= corr2 else row['Feature 2']
136
+ drop = row['Feature 2'] if corr1 >= corr2 else row['Feature 1']
137
+ print(f" KEEP: {keep} (target corr: {target_corr.get(keep, 0):+.4f})")
138
+ print(f" DROP: {drop} (target corr: {target_corr.get(drop, 0):+.4f})")
139
+ print()
140
+
141
+ # =============================================================================
142
+ # 6. SUGGESTED FEATURES TO DROP
143
+ # =============================================================================
144
+
145
+ print("\n" + "="*70)
146
+ print("SUGGESTED FEATURES TO DROP")
147
+ print("="*70)
148
+
149
+ features_to_drop = set()
150
+
151
+ # Add weak features
152
+ for f in weak_features.index:
153
+ features_to_drop.add(f)
154
+
155
+ # Add redundant features (the one less correlated with target)
156
+ for _, row in high_corr_df.iterrows():
157
+ corr1 = abs(target_corr.get(row['Feature 1'], 0))
158
+ corr2 = abs(target_corr.get(row['Feature 2'], 0))
159
+ drop = row['Feature 2'] if corr1 >= corr2 else row['Feature 1']
160
+ features_to_drop.add(drop)
161
+
162
+ print(f"\nBased on analysis, consider dropping these {len(features_to_drop)} features:\n")
163
+ for f in features_to_drop:
164
+ reason = []
165
+ if f in weak_features.index:
166
+ reason.append(f"weak target corr ({target_corr[f]:+.4f})")
167
+ if f in [row['Feature 1'] for _, row in high_corr_df.iterrows()] or \
168
+ f in [row['Feature 2'] for _, row in high_corr_df.iterrows()]:
169
+ reason.append("redundant with another feature")
170
+ print(f" - {f}")
171
+ print(f" Reason: {', '.join(reason)}")
172
+
173
+ # Features to keep
174
+ features_to_keep = [f for f in target_corr.index if f not in features_to_drop]
175
+
176
+ print(f"\nKeep these {len(features_to_keep)} features:\n")
177
+ for f in features_to_keep:
178
+ print(f" + {f} (target corr: {target_corr[f]:+.4f})")
179
+
180
+ # =============================================================================
181
+ # 7. GENERATE CODE SNIPPET
182
+ # =============================================================================
183
+
184
+ print("\n" + "="*70)
185
+ print("CODE SNIPPET FOR YOUR TRAINING SCRIPT")
186
+ print("="*70)
187
+
188
+ print("\n# Copy this to your training script:")
189
+ print(f"columns_to_drop = {list(features_to_drop)}")
190
+
191
+ # =============================================================================
192
+ # 8. SAVE ANALYSIS RESULTS
193
+ # =============================================================================
194
+
195
+ # Save correlation with target
196
+ target_corr.to_csv('target_correlations.csv', header=['correlation'])
197
+
198
+ # Save high correlation pairs
199
+ if len(high_corr_df) > 0:
200
+ high_corr_df.to_csv('redundant_feature_pairs.csv', index=False)
201
+
202
+ # Save recommendations
203
+ with open('feature_selection_recommendations.txt', 'w') as f:
204
+ f.write("FEATURE SELECTION RECOMMENDATIONS\n")
205
+ f.write("="*50 + "\n\n")
206
+ f.write(f"Features to DROP ({len(features_to_drop)}):\n")
207
+ for feat in features_to_drop:
208
+ f.write(f" - {feat}\n")
209
+ f.write(f"\nFeatures to KEEP ({len(features_to_keep)}):\n")
210
+ for feat in features_to_keep:
211
+ f.write(f" + {feat}\n")
212
+
213
+ print("\nFiles saved:")
214
+ print(" 1. correlation_with_target.png")
215
+ print(" 2. correlation_matrix.png")
216
+ print(" 3. target_correlations.csv")
217
+ print(" 4. redundant_feature_pairs.csv")
218
+ print(" 5. feature_selection_recommendations.txt")
dropout_binaryclass/data.csv ADDED
The diff for this file is too large to render. See raw diff
 
dropout_binaryclass/feature_importance.png ADDED
dropout_binaryclass/feature_selection_recommendations.txt ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FEATURE SELECTION RECOMMENDATIONS
2
+ ==================================================
3
+
4
+ Features to DROP (17):
5
+ - Curricular units 2nd sem (enrolled)
6
+ - Curricular units 1st sem (credited)
7
+ - Nacionality
8
+ - Mother's occupation
9
+ - Curricular units 1st sem (approved)
10
+ - Educational special needs
11
+ - Inflation rate
12
+ - International
13
+ - Curricular units 2nd sem (credited)
14
+ - Curricular units 2nd sem (grade)
15
+ - Course
16
+ - Curricular units 1st sem (enrolled)
17
+ - Father's occupation
18
+ - Curricular units 1st sem (evaluations)
19
+ - Curricular units 1st sem (grade)
20
+ - Father's qualification
21
+ - Unemployment rate
22
+
23
+ Features to KEEP (19):
24
+ + Curricular units 2nd sem (approved)
25
+ + Tuition fees up to date
26
+ + Scholarship holder
27
+ + Age at enrollment
28
+ + Debtor
29
+ + Gender
30
+ + Application mode
31
+ + Admission grade
32
+ + Displaced
33
+ + Curricular units 2nd sem (evaluations)
34
+ + Previous qualification (grade)
35
+ + Curricular units 2nd sem (without evaluations)
36
+ + Marital status
37
+ + Application order
38
+ + Daytime/evening attendance
39
+ + Curricular units 1st sem (without evaluations)
40
+ + Previous qualification
41
+ + Mother's qualification
42
+ + GDP
dropout_binaryclass/model_config.json ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "Student Dropout Prediction Model",
3
+ "model_type": "LogisticRegression with StandardScaler",
4
+ "target_mapping": {
5
+ "0": "Dropout",
6
+ "1": "Graduate"
7
+ },
8
+ "features": [
9
+ "Marital status",
10
+ "Application mode",
11
+ "Application order",
12
+ "Course",
13
+ "Daytime/evening attendance\t",
14
+ "Previous qualification",
15
+ "Previous qualification (grade)",
16
+ "Nacionality",
17
+ "Mother's qualification",
18
+ "Father's qualification",
19
+ "Mother's occupation",
20
+ "Admission grade",
21
+ "Displaced",
22
+ "Educational special needs",
23
+ "Debtor",
24
+ "Tuition fees up to date",
25
+ "Gender",
26
+ "Scholarship holder",
27
+ "Age at enrollment",
28
+ "International",
29
+ "Curricular units 1st sem (credited)",
30
+ "Curricular units 1st sem (enrolled)",
31
+ "Curricular units 1st sem (evaluations)",
32
+ "Curricular units 1st sem (approved)",
33
+ "Curricular units 1st sem (grade)",
34
+ "Curricular units 1st sem (without evaluations)",
35
+ "Curricular units 2nd sem (evaluations)",
36
+ "Curricular units 2nd sem (grade)",
37
+ "Curricular units 2nd sem (without evaluations)",
38
+ "Unemployment rate",
39
+ "Inflation rate",
40
+ "GDP"
41
+ ],
42
+ "num_features": 32,
43
+ "dropped_columns": [
44
+ "Father's occupation",
45
+ "Curricular units 2nd sem (credited)",
46
+ "Curricular units 2nd sem (enrolled)",
47
+ "Curricular units 2nd sem (approved)"
48
+ ],
49
+ "feature_details": {
50
+ "Marital status": {
51
+ "dtype": "int64",
52
+ "min": 1.0,
53
+ "max": 6.0,
54
+ "mean": 1.184297520661157,
55
+ "example_value": 1
56
+ },
57
+ "Application mode": {
58
+ "dtype": "int64",
59
+ "min": 1.0,
60
+ "max": 57.0,
61
+ "mean": 18.421763085399448,
62
+ "example_value": 17
63
+ },
64
+ "Application order": {
65
+ "dtype": "int64",
66
+ "min": 0.0,
67
+ "max": 6.0,
68
+ "mean": 1.750137741046832,
69
+ "example_value": 5
70
+ },
71
+ "Course": {
72
+ "dtype": "int64",
73
+ "min": 33.0,
74
+ "max": 9991.0,
75
+ "mean": 8853.980991735538,
76
+ "example_value": 171
77
+ },
78
+ "Daytime/evening attendance\t": {
79
+ "dtype": "int64",
80
+ "min": 0.0,
81
+ "max": 1.0,
82
+ "mean": 0.8876033057851239,
83
+ "example_value": 1
84
+ },
85
+ "Previous qualification": {
86
+ "dtype": "int64",
87
+ "min": 1.0,
88
+ "max": 43.0,
89
+ "mean": 4.532231404958678,
90
+ "example_value": 1
91
+ },
92
+ "Previous qualification (grade)": {
93
+ "dtype": "int64",
94
+ "min": 95.0,
95
+ "max": 190.0,
96
+ "mean": 132.90881542699725,
97
+ "example_value": 122
98
+ },
99
+ "Nacionality": {
100
+ "dtype": "int64",
101
+ "min": 1.0,
102
+ "max": 109.0,
103
+ "mean": 1.828099173553719,
104
+ "example_value": 1
105
+ },
106
+ "Mother's qualification": {
107
+ "dtype": "int64",
108
+ "min": 1.0,
109
+ "max": 44.0,
110
+ "mean": 19.986225895316803,
111
+ "example_value": 19
112
+ },
113
+ "Father's qualification": {
114
+ "dtype": "int64",
115
+ "min": 1.0,
116
+ "max": 44.0,
117
+ "mean": 22.57162534435262,
118
+ "example_value": 12
119
+ },
120
+ "Mother's occupation": {
121
+ "dtype": "int64",
122
+ "min": 0.0,
123
+ "max": 194.0,
124
+ "mean": 10.138567493112948,
125
+ "example_value": 5
126
+ },
127
+ "Admission grade": {
128
+ "dtype": "int64",
129
+ "min": 95.0,
130
+ "max": 190.0,
131
+ "mean": 127.28870523415978,
132
+ "example_value": 127
133
+ },
134
+ "Displaced": {
135
+ "dtype": "int64",
136
+ "min": 0.0,
137
+ "max": 1.0,
138
+ "mean": 0.5490358126721763,
139
+ "example_value": 1
140
+ },
141
+ "Educational special needs": {
142
+ "dtype": "int64",
143
+ "min": 0.0,
144
+ "max": 1.0,
145
+ "mean": 0.011019283746556474,
146
+ "example_value": 0
147
+ },
148
+ "Debtor": {
149
+ "dtype": "int64",
150
+ "min": 0.0,
151
+ "max": 1.0,
152
+ "mean": 0.1137741046831956,
153
+ "example_value": 0
154
+ },
155
+ "Tuition fees up to date": {
156
+ "dtype": "int64",
157
+ "min": 0.0,
158
+ "max": 1.0,
159
+ "mean": 0.8661157024793389,
160
+ "example_value": 1
161
+ },
162
+ "Gender": {
163
+ "dtype": "int64",
164
+ "min": 0.0,
165
+ "max": 1.0,
166
+ "mean": 0.3440771349862259,
167
+ "example_value": 1
168
+ },
169
+ "Scholarship holder": {
170
+ "dtype": "int64",
171
+ "min": 0.0,
172
+ "max": 1.0,
173
+ "mean": 0.26694214876033057,
174
+ "example_value": 0
175
+ },
176
+ "Age at enrollment": {
177
+ "dtype": "int64",
178
+ "min": 17.0,
179
+ "max": 70.0,
180
+ "mean": 23.461157024793387,
181
+ "example_value": 20
182
+ },
183
+ "International": {
184
+ "dtype": "int64",
185
+ "min": 0.0,
186
+ "max": 1.0,
187
+ "mean": 0.023691460055096418,
188
+ "example_value": 0
189
+ },
190
+ "Curricular units 1st sem (credited)": {
191
+ "dtype": "int64",
192
+ "min": 0.0,
193
+ "max": 20.0,
194
+ "mean": 0.7542699724517906,
195
+ "example_value": 0
196
+ },
197
+ "Curricular units 1st sem (enrolled)": {
198
+ "dtype": "int64",
199
+ "min": 0.0,
200
+ "max": 26.0,
201
+ "mean": 6.337465564738292,
202
+ "example_value": 0
203
+ },
204
+ "Curricular units 1st sem (evaluations)": {
205
+ "dtype": "int64",
206
+ "min": 0.0,
207
+ "max": 45.0,
208
+ "mean": 8.071074380165289,
209
+ "example_value": 0
210
+ },
211
+ "Curricular units 1st sem (approved)": {
212
+ "dtype": "int64",
213
+ "min": 0.0,
214
+ "max": 26.0,
215
+ "mean": 4.791460055096419,
216
+ "example_value": 0
217
+ },
218
+ "Curricular units 1st sem (grade)": {
219
+ "dtype": "int64",
220
+ "min": 0.0,
221
+ "max": 19.0,
222
+ "mean": 10.539118457300276,
223
+ "example_value": 0
224
+ },
225
+ "Curricular units 1st sem (without evaluations)": {
226
+ "dtype": "int64",
227
+ "min": 0.0,
228
+ "max": 12.0,
229
+ "mean": 0.12892561983471074,
230
+ "example_value": 0
231
+ },
232
+ "Curricular units 2nd sem (evaluations)": {
233
+ "dtype": "int64",
234
+ "min": 0.0,
235
+ "max": 33.0,
236
+ "mean": 7.763085399449036,
237
+ "example_value": 0
238
+ },
239
+ "Curricular units 2nd sem (grade)": {
240
+ "dtype": "int64",
241
+ "min": 0.0,
242
+ "max": 19.0,
243
+ "mean": 10.038842975206611,
244
+ "example_value": 0
245
+ },
246
+ "Curricular units 2nd sem (without evaluations)": {
247
+ "dtype": "int64",
248
+ "min": 0.0,
249
+ "max": 12.0,
250
+ "mean": 0.14214876033057852,
251
+ "example_value": 0
252
+ },
253
+ "Unemployment rate": {
254
+ "dtype": "int64",
255
+ "min": 8.0,
256
+ "max": 16.0,
257
+ "mean": 11.682920110192837,
258
+ "example_value": 11
259
+ },
260
+ "Inflation rate": {
261
+ "dtype": "int64",
262
+ "min": -1.0,
263
+ "max": 4.0,
264
+ "mean": 1.215702479338843,
265
+ "example_value": 1
266
+ },
267
+ "GDP": {
268
+ "dtype": "int64",
269
+ "min": -4.0,
270
+ "max": 4.0,
271
+ "mean": 0.0418732782369146,
272
+ "example_value": 2
273
+ }
274
+ },
275
+ "model_performance": {
276
+ "avg_roc_auc": 0.9426,
277
+ "std_roc_auc": 0.0022,
278
+ "avg_accuracy": 0.8904,
279
+ "std_accuracy": 0.0123
280
+ },
281
+ "feature_importance": [
282
+ {
283
+ "feature": "Curricular units 1st sem (approved)",
284
+ "coefficient": 3.3163108538242474
285
+ },
286
+ {
287
+ "feature": "Curricular units 2nd sem (grade)",
288
+ "coefficient": 1.5439405534216617
289
+ },
290
+ {
291
+ "feature": "Curricular units 1st sem (enrolled)",
292
+ "coefficient": -1.1411938218498847
293
+ },
294
+ {
295
+ "feature": "Tuition fees up to date",
296
+ "coefficient": 0.9630826567928356
297
+ },
298
+ {
299
+ "feature": "Curricular units 1st sem (credited)",
300
+ "coefficient": -0.8539015768167176
301
+ },
302
+ {
303
+ "feature": "Curricular units 2nd sem (evaluations)",
304
+ "coefficient": -0.6369395746417482
305
+ },
306
+ {
307
+ "feature": "Course",
308
+ "coefficient": -0.6055334597267776
309
+ },
310
+ {
311
+ "feature": "International",
312
+ "coefficient": 0.4993629811863151
313
+ },
314
+ {
315
+ "feature": "Curricular units 1st sem (grade)",
316
+ "coefficient": -0.4580579977450427
317
+ },
318
+ {
319
+ "feature": "Debtor",
320
+ "coefficient": -0.3870319293027283
321
+ },
322
+ {
323
+ "feature": "Nacionality",
324
+ "coefficient": -0.36386269065696214
325
+ },
326
+ {
327
+ "feature": "Scholarship holder",
328
+ "coefficient": 0.3601197899922311
329
+ },
330
+ {
331
+ "feature": "Age at enrollment",
332
+ "coefficient": -0.29681419535938647
333
+ },
334
+ {
335
+ "feature": "Gender",
336
+ "coefficient": -0.22961088968596147
337
+ },
338
+ {
339
+ "feature": "Mother's occupation",
340
+ "coefficient": 0.20867097544620444
341
+ },
342
+ {
343
+ "feature": "Displaced",
344
+ "coefficient": -0.19965059186513248
345
+ },
346
+ {
347
+ "feature": "Curricular units 1st sem (without evaluations)",
348
+ "coefficient": 0.1878768453143166
349
+ },
350
+ {
351
+ "feature": "Previous qualification",
352
+ "coefficient": 0.1635268539723628
353
+ },
354
+ {
355
+ "feature": "Application mode",
356
+ "coefficient": -0.13952867123465623
357
+ },
358
+ {
359
+ "feature": "Curricular units 1st sem (evaluations)",
360
+ "coefficient": 0.13005849075063863
361
+ },
362
+ {
363
+ "feature": "Unemployment rate",
364
+ "coefficient": -0.12395327972323616
365
+ },
366
+ {
367
+ "feature": "Curricular units 2nd sem (without evaluations)",
368
+ "coefficient": 0.11533489424236375
369
+ },
370
+ {
371
+ "feature": "Father's qualification",
372
+ "coefficient": 0.10277051413826378
373
+ },
374
+ {
375
+ "feature": "GDP",
376
+ "coefficient": -0.09145115697113011
377
+ },
378
+ {
379
+ "feature": "Daytime/evening attendance\t",
380
+ "coefficient": -0.08582769046990661
381
+ },
382
+ {
383
+ "feature": "Marital status",
384
+ "coefficient": 0.07586210175822407
385
+ },
386
+ {
387
+ "feature": "Previous qualification (grade)",
388
+ "coefficient": -0.07382604570456465
389
+ },
390
+ {
391
+ "feature": "Admission grade",
392
+ "coefficient": 0.06636622661157908
393
+ },
394
+ {
395
+ "feature": "Mother's qualification",
396
+ "coefficient": -0.05960602912137761
397
+ },
398
+ {
399
+ "feature": "Application order",
400
+ "coefficient": -0.02756430990311611
401
+ },
402
+ {
403
+ "feature": "Inflation rate",
404
+ "coefficient": 0.0016776856356872146
405
+ },
406
+ {
407
+ "feature": "Educational special needs",
408
+ "coefficient": -0.0004318043811183271
409
+ }
410
+ ]
411
+ }
dropout_binaryclass/predict_students_dropout_and_academic_success_model.pkl ADDED
File without changes
dropout_binaryclass/redundant_feature_pairs.csv ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Feature 1,Feature 2,Correlation
2
+ Curricular units 1st sem (credited),Curricular units 2nd sem (credited),0.9470934915899273
3
+ Curricular units 1st sem (enrolled),Curricular units 2nd sem (enrolled),0.9412864966294326
4
+ Curricular units 1st sem (approved),Curricular units 2nd sem (approved),0.9163339784914017
5
+ Mother's occupation,Father's occupation,0.8865682817307416
6
+ Curricular units 1st sem (grade),Curricular units 2nd sem (grade),0.8458637025340845
7
+ Nacionality,International,0.7973873767851265
8
+ Curricular units 1st sem (evaluations),Curricular units 2nd sem (evaluations),0.7906158307754103
9
+ Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),0.7868376275910449
10
+ Curricular units 1st sem (credited),Curricular units 1st sem (enrolled),0.7828630989223708
11
+ Curricular units 1st sem (enrolled),Curricular units 1st sem (approved),0.7735791213004372
12
+ Curricular units 1st sem (enrolled),Curricular units 2nd sem (credited),0.7632761218093532
13
+ Curricular units 1st sem (approved),Curricular units 2nd sem (enrolled),0.7373747998128278
14
+ Curricular units 1st sem (approved),Curricular units 1st sem (grade),0.7101565018864167
15
+ Curricular units 1st sem (approved),Curricular units 2nd sem (grade),0.7093678199762506
16
+ Curricular units 2nd sem (enrolled),Curricular units 2nd sem (approved),0.7044445310875675
dropout_binaryclass/target_correlations.csv ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,correlation
2
+ Curricular units 2nd sem (approved),0.6539952460991423
3
+ Curricular units 2nd sem (grade),0.6053501259229878
4
+ Curricular units 1st sem (approved),0.554880856533347
5
+ Curricular units 1st sem (grade),0.5199270935327744
6
+ Tuition fees up to date,0.4421375757680648
7
+ Scholarship holder,0.313017662589069
8
+ Age at enrollment,-0.2672293831633241
9
+ Debtor,-0.26720719892947853
10
+ Gender,-0.2519548119534265
11
+ Application mode,-0.24450719808426288
12
+ Curricular units 2nd sem (enrolled),0.18289654087432544
13
+ Curricular units 1st sem (enrolled),0.1610735163889365
14
+ Admission grade,0.128057716154513
15
+ Displaced,0.12611303526795542
16
+ Curricular units 2nd sem (evaluations),0.11923876678096997
17
+ Previous qualification (grade),0.10946365310011318
18
+ Curricular units 2nd sem (without evaluations),-0.1026868285766343
19
+ Marital status,-0.10047906625607986
20
+ Application order,0.09435462724757428
21
+ Daytime/evening attendance ,0.08449593574263146
22
+ Curricular units 1st sem (without evaluations),-0.07464226018538014
23
+ Previous qualification,-0.06232290259631596
24
+ Curricular units 1st sem (evaluations),0.05978625949022733
25
+ Mother's qualification,-0.053988794962507865
26
+ Curricular units 2nd sem (credited),0.052401971159116184
27
+ GDP,0.05026014681835994
28
+ Curricular units 1st sem (credited),0.04690001650294807
29
+ Course,0.038135402995266764
30
+ Inflation rate,-0.030325865974636136
31
+ Nacionality,-0.015516308396310501
32
+ Educational special needs,-0.007253654142177353
33
+ International,0.006181262165854279
34
+ Father's qualification,-0.005865479932260606
35
+ Father's occupation,0.005065525427310094
36
+ Unemployment rate,0.0041981052265261075
37
+ Mother's occupation,0.0007724443649592459
dropout_binaryclass/train.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
dropout_binaryclass/train.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+ """
4
+ Student Dropout Prediction Model
5
+ Trains a Logistic Regression model and saves it with feature configuration.
6
+ """
7
+
8
+ import pandas as pd
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ import seaborn as sns
12
+ import json
13
+ import joblib
14
+
15
+ from sklearn.model_selection import StratifiedKFold
16
+ from sklearn.linear_model import LogisticRegression
17
+ from sklearn.pipeline import Pipeline
18
+ from sklearn.preprocessing import StandardScaler
19
+ from sklearn.metrics import roc_auc_score, classification_report, accuracy_score
20
+
21
+ # =============================================================================
22
+ # 1. LOAD AND PREPROCESS DATA
23
+ # =============================================================================
24
+
25
+ # Load data
26
+ df = pd.read_csv('data.csv', sep=';')
27
+ print(f"Original dataset shape: {df.shape}")
28
+
29
+ # Filter out 'Enrolled' - keep only Dropout and Graduate
30
+ df = df[df['Target'] != 'Enrolled']
31
+ print(f"After filtering 'Enrolled': {df.shape}")
32
+
33
+ # Round numeric columns
34
+ df = df.round()
35
+
36
+ # Convert specific columns to int64
37
+ numeric_cols = [
38
+ 'Admission grade',
39
+ 'Previous qualification (grade)',
40
+ 'Curricular units 1st sem (grade)',
41
+ 'Curricular units 2nd sem (grade)',
42
+ 'Unemployment rate',
43
+ 'Inflation rate',
44
+ 'GDP'
45
+ ]
46
+ df[numeric_cols] = df[numeric_cols].astype(np.int64)
47
+
48
+ # Drop unnecessary columns (selected by your classmate)
49
+ columns_to_drop = [
50
+ "Father's occupation",
51
+ "Curricular units 2nd sem (credited)",
52
+ "Curricular units 2nd sem (enrolled)",
53
+ "Curricular units 2nd sem (approved)"
54
+ ]
55
+ df.drop(columns=columns_to_drop, inplace=True)
56
+
57
+ # Transform Target column
58
+ df['Target'] = df['Target'].map({'Dropout': 0, 'Graduate': 1})
59
+
60
+ # Verify target transformation
61
+ print(f"\nTarget distribution:")
62
+ print(df['Target'].value_counts())
63
+
64
+ # Create features and target
65
+ x = df.drop('Target', axis=1)
66
+ y = df['Target'].astype(int)
67
+
68
+ print(f"\nFeatures shape: {x.shape}")
69
+ print(f"Target shape: {y.shape}")
70
+
71
+ # =============================================================================
72
+ # 2. DEFINE MODEL
73
+ # =============================================================================
74
+
75
+ model = Pipeline([
76
+ ('scaler', StandardScaler()),
77
+ ('clf', LogisticRegression(
78
+ penalty='l2',
79
+ C=1.0,
80
+ solver='lbfgs',
81
+ class_weight='balanced',
82
+ random_state=42,
83
+ max_iter=1000
84
+ ))
85
+ ])
86
+
87
+ # =============================================================================
88
+ # 3. CROSS-VALIDATION
89
+ # =============================================================================
90
+
91
+ print("\n" + "="*60)
92
+ print("CROSS-VALIDATION RESULTS")
93
+ print("="*60)
94
+
95
+ skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
96
+ auc_roc_scores = []
97
+ acc_scores = []
98
+
99
+ for fold, (train_index, val_index) in enumerate(skf.split(x, y), 1):
100
+ x_train, x_val = x.iloc[train_index], x.iloc[val_index]
101
+ y_train, y_val = y.iloc[train_index], y.iloc[val_index]
102
+
103
+ model.fit(x_train, y_train)
104
+
105
+ y_pred = model.predict(x_val)
106
+ y_pred_proba = model.predict_proba(x_val)[:, 1]
107
+
108
+ auc_roc = roc_auc_score(y_val, y_pred_proba)
109
+ acc = accuracy_score(y_val, y_pred)
110
+
111
+ auc_roc_scores.append(auc_roc)
112
+ acc_scores.append(acc)
113
+
114
+ print(f"\nFold {fold}:")
115
+ print(f" Accuracy: {acc:.4f}, ROC-AUC: {auc_roc:.4f}")
116
+
117
+ print("\n" + "-"*60)
118
+ print(f"Average ROC-AUC: {np.mean(auc_roc_scores):.4f} ± {np.std(auc_roc_scores):.4f}")
119
+ print(f"Average Accuracy: {np.mean(acc_scores):.4f} ± {np.std(acc_scores):.4f}")
120
+
121
+ # =============================================================================
122
+ # 4. TRAIN FINAL MODEL ON ALL DATA
123
+ # =============================================================================
124
+
125
+ print("\n" + "="*60)
126
+ print("TRAINING FINAL MODEL ON ALL DATA")
127
+ print("="*60)
128
+
129
+ final_model = model.fit(x, y)
130
+ print("Final model trained successfully!")
131
+
132
+ # =============================================================================
133
+ # 5. FEATURE IMPORTANCE
134
+ # =============================================================================
135
+
136
+ classifier = final_model.named_steps['clf']
137
+ feature_importance = pd.DataFrame({
138
+ 'feature': x.columns,
139
+ 'coefficient': classifier.coef_[0]
140
+ }).sort_values('coefficient', key=abs, ascending=False)
141
+
142
+ print("\nTop 10 Most Important Features:")
143
+ print(feature_importance.head(10).to_string(index=False))
144
+
145
+ # Plot feature importance
146
+ plt.figure(figsize=(10, 6))
147
+ sns.barplot(data=feature_importance.head(10), x='coefficient', y='feature')
148
+ plt.title('Top 10 Feature Importance (Logistic Regression Coefficients)')
149
+ plt.tight_layout()
150
+ plt.savefig('feature_importance.png', dpi=150)
151
+ plt.show()
152
+
153
+ # =============================================================================
154
+ # 6. SAVE MODEL AND CONFIGURATION
155
+ # =============================================================================
156
+
157
+ print("\n" + "="*60)
158
+ print("SAVING MODEL AND CONFIGURATION")
159
+ print("="*60)
160
+
161
+ # Save model using joblib (better for sklearn models)
162
+ model_path = "student_dropout_model.pkl"
163
+ joblib.dump(final_model, model_path)
164
+ print(f"Model saved to: {model_path}")
165
+
166
+ # Create and save configuration
167
+ config = {
168
+ "model_name": "Student Dropout Prediction Model",
169
+ "model_type": "LogisticRegression with StandardScaler",
170
+ "target_mapping": {
171
+ "0": "Dropout",
172
+ "1": "Graduate"
173
+ },
174
+ "features": x.columns.tolist(),
175
+ "num_features": len(x.columns),
176
+ "dropped_columns": columns_to_drop,
177
+ "feature_details": {},
178
+ "model_performance": {
179
+ "avg_roc_auc": round(np.mean(auc_roc_scores), 4),
180
+ "std_roc_auc": round(np.std(auc_roc_scores), 4),
181
+ "avg_accuracy": round(np.mean(acc_scores), 4),
182
+ "std_accuracy": round(np.std(acc_scores), 4)
183
+ },
184
+ "feature_importance": feature_importance.to_dict('records')
185
+ }
186
+
187
+ # Add feature details (dtype, min, max, etc.)
188
+ for col in x.columns:
189
+ config["feature_details"][col] = {
190
+ "dtype": str(x[col].dtype),
191
+ "min": float(x[col].min()),
192
+ "max": float(x[col].max()),
193
+ "mean": float(x[col].mean()),
194
+ "example_value": int(x[col].iloc[0]) if x[col].dtype in ['int64', 'int32'] else float(x[col].iloc[0])
195
+ }
196
+
197
+ # Save configuration
198
+ config_path = "model_config.json"
199
+ with open(config_path, 'w') as f:
200
+ json.dump(config, f, indent=2)
201
+ print(f"Configuration saved to: {config_path}")
202
+
203
+ # =============================================================================
204
+ # 7. PRINT SUMMARY
205
+ # =============================================================================
206
+
207
+ print("\n" + "="*60)
208
+ print("SUMMARY: FEATURES YOUR CLASSMATE SELECTED")
209
+ print("="*60)
210
+ print(f"\nTotal features: {len(x.columns)}")
211
+ print("\nFeature list:")
212
+ for i, col in enumerate(x.columns, 1):
213
+ print(f" {i:2d}. {col}")
214
+
215
+ print(f"\nDropped columns:")
216
+ for col in columns_to_drop:
217
+ print(f" - {col}")
218
+
219
+ print("\n" + "="*60)
220
+ print("DONE! Files created:")
221
+ print(f" 1. {model_path} (trained model)")
222
+ print(f" 2. {config_path} (feature configuration)")
223
+ print(f" 3. feature_importance.png (visualization)")
224
+ print("="*60)
grade_multiclass/02_grade_distribution.png ADDED
grade_multiclass/03_performance_index_distribution.png ADDED
grade_multiclass/04_features_by_grade.png ADDED
grade_multiclass/05_extracurricular_analysis.png ADDED
grade_multiclass/06_correlation_heatmap.png ADDED
grade_multiclass/09_feature_importance.png ADDED
grade_multiclass/10_learning_curves.png ADDED
grade_multiclass/11_model_comparison.png ADDED
grade_multiclass/Student_Performance.csv ADDED
The diff for this file is too large to render. See raw diff
 
grade_multiclass/correlation_heatmap.png ADDED
grade_multiclass/feature_importance.png ADDED
grade_multiclass/features_by_grade.png ADDED
grade_multiclass/learning_curves.png ADDED
grade_multiclass/model_comparison.png ADDED
grade_multiclass/student_performance_classification.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
grade_multiclass/student_performance_classification.py ADDED
@@ -0,0 +1,1100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ """
5
+ Student Performance Multi-Class Classification
6
+ ==============================================
7
+ Predicting student grades from study habits, historical performance,
8
+ and lifestyle factors.
9
+
10
+ Dataset: 10,000 student records with 5 features
11
+ Target: Performance Index → Converted to letter grades (A/B/C/D/F)
12
+ """
13
+
14
+ # =============================================================================
15
+ # 1. IMPORTS AND CONFIGURATION
16
+ # =============================================================================
17
+
18
+ import pandas as pd
19
+ import numpy as np
20
+ import matplotlib.pyplot as plt
21
+ import seaborn as sns
22
+ import joblib
23
+ import warnings
24
+ from pathlib import Path
25
+
26
+ from sklearn.model_selection import (
27
+ train_test_split,
28
+ cross_val_score,
29
+ StratifiedKFold,
30
+ GridSearchCV,
31
+ learning_curve
32
+ )
33
+ from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
34
+ from sklearn.compose import ColumnTransformer
35
+ from sklearn.pipeline import Pipeline
36
+ from sklearn.linear_model import LogisticRegression
37
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
38
+ from sklearn.metrics import (
39
+ classification_report,
40
+ confusion_matrix,
41
+ ConfusionMatrixDisplay,
42
+ accuracy_score,
43
+ f1_score
44
+ )
45
+ from sklearn.utils.class_weight import compute_class_weight
46
+
47
+ # Configuration
48
+ warnings.filterwarnings('ignore')
49
+ sns.set_theme(style="whitegrid", palette="muted")
50
+ plt.rcParams["figure.figsize"] = (10, 6)
51
+ RANDOM_STATE = 42
52
+ CV_FOLDS = 5
53
+
54
+ print("=" * 60)
55
+ print(" STUDENT PERFORMANCE CLASSIFICATION")
56
+ print(" Multi-Class Grade Prediction from Academic Factors")
57
+ print("=" * 60)
58
+
59
+
60
+ # =============================================================================
61
+ # 2. DATA LOADING AND INITIAL INSPECTION
62
+ # =============================================================================
63
+
64
+ def load_and_inspect_data(filepath: str) -> pd.DataFrame:
65
+ """Load dataset and perform initial inspection."""
66
+
67
+ df = pd.read_csv(filepath)
68
+
69
+ print("\n📊 DATASET OVERVIEW")
70
+ print("-" * 40)
71
+ print(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns")
72
+ print(f"\nColumns: {list(df.columns)}")
73
+ print(f"\nData Types:\n{df.dtypes}")
74
+ print(f"\nMissing Values:\n{df.isnull().sum()}")
75
+ print(f"\nBasic Statistics:\n{df.describe()}")
76
+
77
+ # Check categorical column
78
+ print(f"\nExtracurricular Activities Distribution:")
79
+ print(df['Extracurricular Activities'].value_counts())
80
+
81
+ return df
82
+
83
+ # Load data
84
+ df = load_and_inspect_data('Student_Performance.csv')
85
+ print("\nFirst 10 rows:")
86
+ print(df.head(10))
87
+
88
+
89
+ # =============================================================================
90
+ # 3. TARGET VARIABLE CREATION
91
+ # =============================================================================
92
+
93
+ def create_grade_labels(performance_index: pd.Series) -> pd.Series:
94
+ """
95
+ Convert continuous Performance Index to letter grades.
96
+
97
+ Grading Scale:
98
+ A: 90-100
99
+ B: 80-89
100
+ C: 70-79
101
+ D: 60-69
102
+ F: 0-59
103
+ """
104
+ bins = [0, 60, 70, 80, 90, 101]
105
+ labels = ['F', 'D', 'C', 'B', 'A']
106
+
107
+ grades = pd.cut(
108
+ performance_index,
109
+ bins=bins,
110
+ labels=labels,
111
+ right=False,
112
+ include_lowest=True
113
+ )
114
+
115
+ return grades
116
+
117
+ # Create target variable
118
+ df['grade'] = create_grade_labels(df['Performance Index'])
119
+
120
+ print("\n🎯 TARGET VARIABLE CREATED")
121
+ print("-" * 40)
122
+ print("Grade Distribution:")
123
+ grade_counts = df['grade'].value_counts().sort_index()
124
+ for grade in ['A', 'B', 'C', 'D', 'F']:
125
+ count = grade_counts.get(grade, 0)
126
+ pct = count / len(df) * 100
127
+ bar = "█" * int(pct / 2)
128
+ print(f" {grade}: {count:>5} ({pct:>5.2f}%) {bar}")
129
+
130
+ # Check imbalance
131
+ imbalance_ratio = grade_counts.max() / grade_counts.min()
132
+ print(f"\nImbalance Ratio: {imbalance_ratio:.2f}")
133
+ if imbalance_ratio > 10:
134
+ print("⚠️ Significant imbalance - will use class weights")
135
+ else:
136
+ print("✅ Classes are reasonably balanced")
137
+
138
+
139
+ # =============================================================================
140
+ # 4. EXPLORATORY DATA ANALYSIS
141
+ # =============================================================================
142
+
143
+ def perform_eda(df: pd.DataFrame):
144
+ """Comprehensive exploratory data analysis."""
145
+
146
+ print("\n📈 EXPLORATORY DATA ANALYSIS")
147
+ print("=" * 60)
148
+
149
+ # Define feature groups
150
+ numerical_features = [
151
+ 'Hours Studied',
152
+ 'Previous Scores',
153
+ 'Sleep Hours',
154
+ 'Sample Question Papers Practiced'
155
+ ]
156
+ categorical_features = ['Extracurricular Activities']
157
+
158
+ # 4.1 Numerical Feature Distributions
159
+ fig, axes = plt.subplots(2, 2, figsize=(14, 10))
160
+ axes = axes.flatten()
161
+
162
+ for i, col in enumerate(numerical_features):
163
+ sns.histplot(df[col], kde=True, ax=axes[i], color='teal', bins=30)
164
+ axes[i].axvline(df[col].mean(), color='red', linestyle='--',
165
+ label=f'Mean: {df[col].mean():.1f}')
166
+ axes[i].axvline(df[col].median(), color='orange', linestyle='--',
167
+ label=f'Median: {df[col].median():.1f}')
168
+ axes[i].set_title(f'Distribution of {col}')
169
+ axes[i].legend()
170
+
171
+ plt.tight_layout()
172
+ plt.savefig('01_feature_distributions.png', dpi=150, bbox_inches='tight')
173
+ plt.show()
174
+
175
+ # 4.2 Target Distribution
176
+ fig, axes = plt.subplots(1, 2, figsize=(14, 5))
177
+
178
+ grade_order = ['A', 'B', 'C', 'D', 'F']
179
+ grade_counts = df['grade'].value_counts().reindex(grade_order)
180
+
181
+ colors = sns.color_palette('RdYlGn_r', 5)
182
+
183
+ # Bar chart
184
+ bars = axes[0].bar(grade_order, grade_counts.values, color=colors)
185
+ axes[0].set_title('Grade Distribution', fontsize=14)
186
+ axes[0].set_xlabel('Grade')
187
+ axes[0].set_ylabel('Count')
188
+ for bar, count in zip(bars, grade_counts.values):
189
+ axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 20,
190
+ f'{count}', ha='center', fontsize=11)
191
+
192
+ # Pie chart
193
+ axes[1].pie(grade_counts, labels=grade_order, autopct='%1.1f%%',
194
+ colors=colors, explode=[0.02]*5)
195
+ axes[1].set_title('Grade Distribution (%)', fontsize=14)
196
+
197
+ plt.tight_layout()
198
+ plt.savefig('02_grade_distribution.png', dpi=150, bbox_inches='tight')
199
+ plt.show()
200
+
201
+ # 4.3 Performance Index Distribution (before binning)
202
+ plt.figure(figsize=(12, 5))
203
+ sns.histplot(df['Performance Index'], kde=True, bins=50, color='steelblue')
204
+
205
+ # Add grade boundary lines
206
+ boundaries = [60, 70, 80, 90]
207
+ boundary_labels = ['F/D', 'D/C', 'C/B', 'B/A']
208
+ for bound, label in zip(boundaries, boundary_labels):
209
+ plt.axvline(bound, color='red', linestyle='--', alpha=0.7)
210
+ plt.text(bound + 1, plt.gca().get_ylim()[1] * 0.9, label, fontsize=10)
211
+
212
+ plt.title('Performance Index Distribution with Grade Boundaries')
213
+ plt.xlabel('Performance Index')
214
+ plt.savefig('03_performance_index_distribution.png', dpi=150, bbox_inches='tight')
215
+ plt.show()
216
+
217
+ # 4.4 Features by Grade (Box Plots)
218
+ fig, axes = plt.subplots(2, 2, figsize=(14, 10))
219
+ axes = axes.flatten()
220
+
221
+ for i, col in enumerate(numerical_features):
222
+ sns.boxplot(data=df, x='grade', y=col, order=grade_order,
223
+ hue='grade', palette='RdYlGn_r', legend=False, ax=axes[i])
224
+ axes[i].set_title(f'{col} by Grade')
225
+
226
+ plt.tight_layout()
227
+ plt.savefig('04_features_by_grade.png', dpi=150, bbox_inches='tight')
228
+ plt.show()
229
+
230
+ # 4.5 Extracurricular Activities Analysis
231
+ fig, axes = plt.subplots(1, 2, figsize=(14, 5))
232
+
233
+ # Grade distribution by extracurricular
234
+ ct = pd.crosstab(df['Extracurricular Activities'], df['grade'], normalize='index') * 100
235
+ ct = ct[grade_order]
236
+ ct.plot(kind='bar', ax=axes[0], color=colors, edgecolor='black')
237
+ axes[0].set_title('Grade Distribution by Extracurricular Activities')
238
+ axes[0].set_ylabel('Percentage')
239
+ axes[0].set_xticklabels(['No', 'Yes'], rotation=0)
240
+ axes[0].legend(title='Grade', bbox_to_anchor=(1.02, 1))
241
+
242
+ # Performance Index by extracurricular
243
+ sns.boxplot(data=df, x='Extracurricular Activities', y='Performance Index',
244
+ hue='Extracurricular Activities', palette='Set2', legend=False, ax=axes[1])
245
+ axes[1].set_title('Performance Index by Extracurricular Activities')
246
+
247
+ plt.tight_layout()
248
+ plt.savefig('05_extracurricular_analysis.png', dpi=150, bbox_inches='tight')
249
+ plt.show()
250
+
251
+ # 4.6 Correlation Analysis
252
+ plt.figure(figsize=(10, 8))
253
+
254
+ # Create correlation matrix (encode extracurricular for correlation)
255
+ df_corr = df.copy()
256
+ df_corr['Extracurricular (encoded)'] = (df_corr['Extracurricular Activities'] == 'Yes').astype(int)
257
+
258
+ corr_cols = numerical_features + ['Extracurricular (encoded)', 'Performance Index']
259
+ corr_matrix = df_corr[corr_cols].corr()
260
+
261
+ mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
262
+ sns.heatmap(corr_matrix, annot=True, cmap='RdBu_r', center=0,
263
+ mask=mask, square=True, linewidths=0.5, fmt='.2f')
264
+ plt.title('Feature Correlation Heatmap')
265
+ plt.tight_layout()
266
+ plt.savefig('06_correlation_heatmap.png', dpi=150, bbox_inches='tight')
267
+ plt.show()
268
+
269
+ # 4.7 Pairplot for key relationships
270
+ print("\nGenerating pairplot (this may take a moment)...")
271
+ key_features = ['Hours Studied', 'Previous Scores', 'Performance Index']
272
+ sample_df = df.sample(n=min(2000, len(df)), random_state=RANDOM_STATE)
273
+
274
+ g = sns.pairplot(sample_df, vars=key_features, hue='grade',
275
+ hue_order=grade_order, palette='RdYlGn_r',
276
+ diag_kind='kde', plot_kws={'alpha': 0.6})
277
+ g.fig.suptitle('Feature Relationships by Grade', y=1.02)
278
+ plt.savefig('07_pairplot.png', dpi=150, bbox_inches='tight')
279
+ plt.show()
280
+
281
+ # 4.8 Print correlation insights
282
+ print("\n📊 CORRELATION INSIGHTS")
283
+ print("-" * 40)
284
+ perf_corr = corr_matrix['Performance Index'].drop('Performance Index').sort_values(ascending=False)
285
+ print("Correlation with Performance Index:")
286
+ for feat, corr in perf_corr.items():
287
+ indicator = "↑↑" if corr > 0.5 else "↑" if corr > 0.3 else "→" if corr > -0.3 else "↓"
288
+ print(f" {indicator} {feat}: {corr:.3f}")
289
+
290
+ perform_eda(df)
291
+
292
+
293
+ # =============================================================================
294
+ # 5. DATA PREPROCESSING
295
+ # =============================================================================
296
+
297
+ class StudentDataPreprocessor:
298
+ """Handles all data preprocessing steps."""
299
+
300
+ def __init__(self):
301
+ self.numerical_features = [
302
+ 'Hours Studied',
303
+ 'Previous Scores',
304
+ 'Sleep Hours',
305
+ 'Sample Question Papers Practiced'
306
+ ]
307
+ self.categorical_features = ['Extracurricular Activities']
308
+ self.all_features = self.numerical_features + self.categorical_features
309
+
310
+ self.scaler = StandardScaler()
311
+ self.label_encoder = LabelEncoder()
312
+ self.onehot_encoder = OneHotEncoder(drop='first', sparse_output=False)
313
+
314
+ self.grade_mapping = None
315
+ self.class_weights = None
316
+ self.is_fitted = False
317
+
318
+ def fit_transform(self, df: pd.DataFrame):
319
+ """Fit preprocessors and transform data."""
320
+
321
+ # Extract features
322
+ X_numerical = df[self.numerical_features].copy()
323
+ X_categorical = df[self.categorical_features].copy()
324
+ y = df['grade'].copy()
325
+
326
+ # Encode target
327
+ y_encoded = self.label_encoder.fit_transform(y)
328
+ self.grade_mapping = dict(zip(
329
+ self.label_encoder.classes_,
330
+ self.label_encoder.transform(self.label_encoder.classes_)
331
+ ))
332
+
333
+ # Compute class weights
334
+ classes = np.unique(y_encoded)
335
+ weights = compute_class_weight('balanced', classes=classes, y=y_encoded)
336
+ self.class_weights = dict(zip(classes, weights))
337
+
338
+ # Scale numerical features
339
+ X_numerical_scaled = self.scaler.fit_transform(X_numerical)
340
+
341
+ # Encode categorical features
342
+ X_categorical_encoded = self.onehot_encoder.fit_transform(X_categorical)
343
+
344
+ # Combine features
345
+ X_combined = np.hstack([X_numerical_scaled, X_categorical_encoded])
346
+
347
+ # Get feature names for later
348
+ cat_feature_names = self.onehot_encoder.get_feature_names_out(self.categorical_features)
349
+ self.feature_names = self.numerical_features + list(cat_feature_names)
350
+
351
+ self.is_fitted = True
352
+
353
+ print("\n🔧 PREPROCESSING COMPLETE")
354
+ print("-" * 40)
355
+ print(f"Numerical features: {self.numerical_features}")
356
+ print(f"Categorical features: {self.categorical_features}")
357
+ print(f"Total features after encoding: {len(self.feature_names)}")
358
+ print(f"\nFeature names: {self.feature_names}")
359
+ print(f"\nTarget Mapping: {self.grade_mapping}")
360
+ print(f"\nClass Weights:")
361
+ for cls, weight in self.class_weights.items():
362
+ grade = self.get_grade_from_encoding(cls)
363
+ print(f" {grade}: {weight:.4f}")
364
+
365
+ return X_combined, y_encoded
366
+
367
+ def transform(self, df: pd.DataFrame):
368
+ """Transform new data using fitted preprocessors."""
369
+ if not self.is_fitted:
370
+ raise ValueError("Preprocessor must be fitted before transforming.")
371
+
372
+ X_numerical = df[self.numerical_features].copy()
373
+ X_categorical = df[self.categorical_features].copy()
374
+
375
+ X_numerical_scaled = self.scaler.transform(X_numerical)
376
+ X_categorical_encoded = self.onehot_encoder.transform(X_categorical)
377
+
378
+ return np.hstack([X_numerical_scaled, X_categorical_encoded])
379
+
380
+ def transform_single(self, hours_studied, previous_scores, sleep_hours,
381
+ sample_papers, extracurricular):
382
+ """Transform a single sample for prediction."""
383
+ if not self.is_fitted:
384
+ raise ValueError("Preprocessor must be fitted before transforming.")
385
+
386
+ df = pd.DataFrame({
387
+ 'Hours Studied': [hours_studied],
388
+ 'Previous Scores': [previous_scores],
389
+ 'Sleep Hours': [sleep_hours],
390
+ 'Sample Question Papers Practiced': [sample_papers],
391
+ 'Extracurricular Activities': [extracurricular]
392
+ })
393
+
394
+ return self.transform(df)
395
+
396
+ def get_grade_from_encoding(self, encoding: int) -> str:
397
+ """Get grade letter from numeric encoding."""
398
+ inv_map = {v: k for k, v in self.grade_mapping.items()}
399
+ return inv_map[encoding]
400
+
401
+ def save(self, filepath: str):
402
+ """Save preprocessor to disk."""
403
+ joblib.dump(self, filepath)
404
+
405
+ @staticmethod
406
+ def load(filepath: str):
407
+ """Load preprocessor from disk."""
408
+ return joblib.load(filepath)
409
+
410
+ # Initialize and fit preprocessor
411
+ preprocessor = StudentDataPreprocessor()
412
+ X, y = preprocessor.fit_transform(df)
413
+
414
+
415
+ # =============================================================================
416
+ # 6. TRAIN/TEST SPLIT
417
+ # =============================================================================
418
+
419
+ X_train, X_test, y_train, y_test = train_test_split(
420
+ X, y,
421
+ test_size=0.20,
422
+ random_state=RANDOM_STATE,
423
+ stratify=y
424
+ )
425
+
426
+ print("\n📂 DATA SPLIT")
427
+ print("-" * 40)
428
+ print(f"Training set: {X_train.shape[0]:,} samples ({X_train.shape[0]/len(y)*100:.1f}%)")
429
+ print(f"Testing set: {X_test.shape[0]:,} samples ({X_test.shape[0]/len(y)*100:.1f}%)")
430
+ print(f"Features: {X_train.shape[1]}")
431
+
432
+ print(f"\nTraining set class distribution:")
433
+ unique, counts = np.unique(y_train, return_counts=True)
434
+ for u, c in zip(unique, counts):
435
+ print(f" {preprocessor.get_grade_from_encoding(u)}: {c:,} ({c/len(y_train)*100:.1f}%)")
436
+
437
+
438
+ # =============================================================================
439
+ # 7. MODEL TRAINING WITH CROSS-VALIDATION
440
+ # =============================================================================
441
+
442
+ def cross_validate_model(model, X, y, cv_folds: int = 5, model_name: str = "Model"):
443
+ """Perform cross-validation and return detailed metrics."""
444
+
445
+ cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=RANDOM_STATE)
446
+
447
+ accuracy_scores = cross_val_score(model, X, y, cv=cv, scoring='accuracy', n_jobs=-1)
448
+ f1_macro_scores = cross_val_score(model, X, y, cv=cv, scoring='f1_macro', n_jobs=-1)
449
+ f1_weighted_scores = cross_val_score(model, X, y, cv=cv, scoring='f1_weighted', n_jobs=-1)
450
+
451
+ results = {
452
+ 'model_name': model_name,
453
+ 'accuracy_mean': accuracy_scores.mean(),
454
+ 'accuracy_std': accuracy_scores.std(),
455
+ 'f1_macro_mean': f1_macro_scores.mean(),
456
+ 'f1_macro_std': f1_macro_scores.std(),
457
+ 'f1_weighted_mean': f1_weighted_scores.mean(),
458
+ 'f1_weighted_std': f1_weighted_scores.std(),
459
+ }
460
+
461
+ print(f"\n{model_name} - {cv_folds}-Fold Cross-Validation:")
462
+ print(f" Accuracy: {results['accuracy_mean']:.4f} ± {results['accuracy_std']:.4f}")
463
+ print(f" F1 (Macro): {results['f1_macro_mean']:.4f} ± {results['f1_macro_std']:.4f}")
464
+ print(f" F1 (Weight): {results['f1_weighted_mean']:.4f} ± {results['f1_weighted_std']:.4f}")
465
+
466
+ return results
467
+
468
+ print("\n🤖 MODEL TRAINING WITH CROSS-VALIDATION")
469
+ print("=" * 60)
470
+
471
+ # Define models
472
+ models = {
473
+ 'Logistic Regression': LogisticRegression(
474
+ solver='lbfgs',
475
+ max_iter=1000,
476
+ random_state=RANDOM_STATE,
477
+ class_weight='balanced',
478
+ n_jobs=-1
479
+ ),
480
+ 'Random Forest': RandomForestClassifier(
481
+ n_estimators=100,
482
+ max_depth=15,
483
+ random_state=RANDOM_STATE,
484
+ class_weight='balanced',
485
+ n_jobs=-1
486
+ ),
487
+ 'Gradient Boosting': GradientBoostingClassifier(
488
+ n_estimators=100,
489
+ max_depth=5,
490
+ random_state=RANDOM_STATE
491
+ )
492
+ }
493
+
494
+ # Cross-validate all models
495
+ cv_results = {}
496
+ for name, model in models.items():
497
+ cv_results[name] = cross_validate_model(model, X_train, y_train, CV_FOLDS, name)
498
+
499
+
500
+ # =============================================================================
501
+ # 8. HYPERPARAMETER TUNING
502
+ # =============================================================================
503
+
504
+ print("\n🔍 HYPERPARAMETER TUNING")
505
+ print("=" * 60)
506
+
507
+ # Tune Random Forest
508
+ print("\nTuning Random Forest...")
509
+ rf_param_grid = {
510
+ 'n_estimators': [50, 100, 200],
511
+ 'max_depth': [10, 15, 20, None],
512
+ 'min_samples_split': [2, 5, 10],
513
+ 'min_samples_leaf': [1, 2, 4]
514
+ }
515
+
516
+ rf_grid = GridSearchCV(
517
+ RandomForestClassifier(random_state=RANDOM_STATE, class_weight='balanced', n_jobs=-1),
518
+ rf_param_grid,
519
+ cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE),
520
+ scoring='f1_macro',
521
+ n_jobs=-1,
522
+ verbose=1
523
+ )
524
+ rf_grid.fit(X_train, y_train)
525
+
526
+ print(f"\nRandom Forest Best Parameters: {rf_grid.best_params_}")
527
+ print(f"Random Forest Best CV F1 (Macro): {rf_grid.best_score_:.4f}")
528
+
529
+ # Tune Gradient Boosting
530
+ print("\nTuning Gradient Boosting...")
531
+ gb_param_grid = {
532
+ 'n_estimators': [50, 100, 150],
533
+ 'max_depth': [3, 5, 7],
534
+ 'learning_rate': [0.05, 0.1, 0.2],
535
+ 'min_samples_split': [2, 5]
536
+ }
537
+
538
+ gb_grid = GridSearchCV(
539
+ GradientBoostingClassifier(random_state=RANDOM_STATE),
540
+ gb_param_grid,
541
+ cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE),
542
+ scoring='f1_macro',
543
+ n_jobs=-1,
544
+ verbose=1
545
+ )
546
+ gb_grid.fit(X_train, y_train)
547
+
548
+ print(f"\nGradient Boosting Best Parameters: {gb_grid.best_params_}")
549
+ print(f"Gradient Boosting Best CV F1 (Macro): {gb_grid.best_score_:.4f}")
550
+
551
+ # Select best model
552
+ best_models = {
553
+ 'Random Forest': (rf_grid.best_estimator_, rf_grid.best_score_),
554
+ 'Gradient Boosting': (gb_grid.best_estimator_, gb_grid.best_score_)
555
+ }
556
+
557
+ best_model_name = max(best_models.keys(), key=lambda k: best_models[k][1])
558
+ best_model = best_models[best_model_name][0]
559
+
560
+ print(f"\n🏆 Best Model: {best_model_name}")
561
+
562
+
563
+ # =============================================================================
564
+ # 9. FINAL MODEL EVALUATION
565
+ # =============================================================================
566
+
567
+ def comprehensive_evaluation(model, X_test, y_test, preprocessor, model_name: str):
568
+ """Comprehensive model evaluation with visualizations."""
569
+
570
+ y_pred = model.predict(X_test)
571
+ y_proba = model.predict_proba(X_test)
572
+
573
+ accuracy = accuracy_score(y_test, y_pred)
574
+ f1_macro = f1_score(y_test, y_pred, average='macro')
575
+ f1_weighted = f1_score(y_test, y_pred, average='weighted')
576
+
577
+ print(f"\n{'='*60}")
578
+ print(f"📊 {model_name} - TEST SET EVALUATION")
579
+ print(f"{'='*60}")
580
+ print(f"\nOverall Metrics:")
581
+ print(f" Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
582
+ print(f" F1 Score (Macro): {f1_macro:.4f}")
583
+ print(f" F1 Score (Weight): {f1_weighted:.4f}")
584
+
585
+ print(f"\nDetailed Classification Report:")
586
+ print(classification_report(
587
+ y_test, y_pred,
588
+ target_names=preprocessor.label_encoder.classes_,
589
+ zero_division=0
590
+ ))
591
+
592
+ # Confusion Matrices
593
+ fig, axes = plt.subplots(1, 2, figsize=(14, 5))
594
+
595
+ cm = confusion_matrix(y_test, y_pred)
596
+ disp = ConfusionMatrixDisplay(
597
+ confusion_matrix=cm,
598
+ display_labels=preprocessor.label_encoder.classes_
599
+ )
600
+ disp.plot(cmap='Blues', ax=axes[0])
601
+ axes[0].set_title(f'Confusion Matrix - {model_name}')
602
+
603
+ cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
604
+ disp_norm = ConfusionMatrixDisplay(
605
+ confusion_matrix=cm_normalized,
606
+ display_labels=preprocessor.label_encoder.classes_
607
+ )
608
+ disp_norm.plot(cmap='Blues', ax=axes[1], values_format='.2%')
609
+ axes[1].set_title(f'Normalized Confusion Matrix - {model_name}')
610
+
611
+ plt.tight_layout()
612
+ plt.savefig(f'08_confusion_matrix_{model_name.lower().replace(" ", "_")}.png',
613
+ dpi=150, bbox_inches='tight')
614
+ plt.show()
615
+
616
+ return {
617
+ 'accuracy': accuracy,
618
+ 'f1_macro': f1_macro,
619
+ 'f1_weighted': f1_weighted,
620
+ 'y_pred': y_pred,
621
+ 'y_proba': y_proba
622
+ }
623
+
624
+ final_results = comprehensive_evaluation(best_model, X_test, y_test, preprocessor, best_model_name)
625
+
626
+
627
+ # =============================================================================
628
+ # 10. FEATURE IMPORTANCE ANALYSIS
629
+ # =============================================================================
630
+
631
+ def plot_feature_importance(model, feature_names: list, model_name: str):
632
+ """Visualize feature importances."""
633
+
634
+ if hasattr(model, 'feature_importances_'):
635
+ importances = model.feature_importances_
636
+ else:
637
+ print("Model doesn't support feature importance extraction.")
638
+ return
639
+
640
+ indices = np.argsort(importances)[::-1]
641
+
642
+ print(f"\n📊 Feature Importance - {model_name}")
643
+ print("-" * 40)
644
+ for i, idx in enumerate(indices):
645
+ print(f" {i+1}. {feature_names[idx]}: {importances[idx]:.4f} ({importances[idx]*100:.1f}%)")
646
+
647
+ plt.figure(figsize=(10, 6))
648
+ colors = sns.color_palette('viridis', len(feature_names))
649
+ bars = plt.barh(range(len(indices)), importances[indices], color=colors)
650
+ plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
651
+ plt.xlabel('Feature Importance')
652
+ plt.title(f'Feature Importance - {model_name}')
653
+ plt.gca().invert_yaxis()
654
+
655
+ for bar, imp in zip(bars, importances[indices]):
656
+ plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2,
657
+ f'{imp:.3f}', va='center', fontsize=10)
658
+
659
+ plt.tight_layout()
660
+ plt.savefig('09_feature_importance.png', dpi=150, bbox_inches='tight')
661
+ plt.show()
662
+
663
+ plot_feature_importance(best_model, preprocessor.feature_names, best_model_name)
664
+
665
+
666
+ # =============================================================================
667
+ # 11. LEARNING CURVES
668
+ # =============================================================================
669
+
670
+ def plot_learning_curves(model, X, y, model_name: str):
671
+ """Plot learning curves to diagnose bias/variance."""
672
+
673
+ print(f"\nGenerating learning curves for {model_name}...")
674
+
675
+ train_sizes, train_scores, val_scores = learning_curve(
676
+ model, X, y,
677
+ train_sizes=np.linspace(0.1, 1.0, 10),
678
+ cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE),
679
+ scoring='f1_macro',
680
+ n_jobs=-1
681
+ )
682
+
683
+ train_mean = train_scores.mean(axis=1)
684
+ train_std = train_scores.std(axis=1)
685
+ val_mean = val_scores.mean(axis=1)
686
+ val_std = val_scores.std(axis=1)
687
+
688
+ plt.figure(figsize=(10, 6))
689
+ plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std,
690
+ alpha=0.1, color='blue')
691
+ plt.fill_between(train_sizes, val_mean - val_std, val_mean + val_std,
692
+ alpha=0.1, color='orange')
693
+ plt.plot(train_sizes, train_mean, 'o-', color='blue', label='Training Score')
694
+ plt.plot(train_sizes, val_mean, 'o-', color='orange', label='Validation Score')
695
+ plt.xlabel('Training Set Size')
696
+ plt.ylabel('F1 Score (Macro)')
697
+ plt.title(f'Learning Curves - {model_name}')
698
+ plt.legend(loc='lower right')
699
+ plt.grid(True, alpha=0.3)
700
+ plt.tight_layout()
701
+ plt.savefig('10_learning_curves.png', dpi=150, bbox_inches='tight')
702
+ plt.show()
703
+
704
+ final_gap = train_mean[-1] - val_mean[-1]
705
+ print(f"\n📈 Learning Curve Analysis:")
706
+ print(f" Final Training Score: {train_mean[-1]:.4f}")
707
+ print(f" Final Validation Score: {val_mean[-1]:.4f}")
708
+ print(f" Gap: {final_gap:.4f}")
709
+
710
+ if final_gap > 0.1:
711
+ print(" ⚠️ High variance - model may be overfitting")
712
+ elif val_mean[-1] < 0.6:
713
+ print(" ⚠️ High bias - model may be underfitting")
714
+ else:
715
+ print(" ✅ Model appears well-balanced")
716
+
717
+ # Create fresh model for learning curves
718
+ if best_model_name == 'Random Forest':
719
+ model_for_curves = RandomForestClassifier(**rf_grid.best_params_,
720
+ random_state=RANDOM_STATE,
721
+ class_weight='balanced',
722
+ n_jobs=-1)
723
+ else:
724
+ model_for_curves = GradientBoostingClassifier(**gb_grid.best_params_,
725
+ random_state=RANDOM_STATE)
726
+
727
+ plot_learning_curves(model_for_curves, X_train, y_train, best_model_name)
728
+
729
+
730
+ # =============================================================================
731
+ # 12. MODEL COMPARISON SUMMARY
732
+ # =============================================================================
733
+
734
+ def create_comparison_summary(cv_results: dict, best_model_name: str, final_accuracy: float):
735
+ """Create a summary comparison table."""
736
+
737
+ print("\n" + "=" * 60)
738
+ print("📋 MODEL COMPARISON SUMMARY")
739
+ print("=" * 60)
740
+
741
+ summary_data = []
742
+ for name, results in cv_results.items():
743
+ summary_data.append({
744
+ 'Model': name,
745
+ 'CV Accuracy': f"{results['accuracy_mean']:.4f} ± {results['accuracy_std']:.4f}",
746
+ 'CV F1 (Macro)': f"{results['f1_macro_mean']:.4f} ± {results['f1_macro_std']:.4f}",
747
+ 'CV F1 (Weighted)': f"{results['f1_weighted_mean']:.4f} ± {results['f1_weighted_std']:.4f}"
748
+ })
749
+
750
+ summary_df = pd.DataFrame(summary_data)
751
+ print(summary_df.to_string(index=False))
752
+
753
+ # Visualization
754
+ fig, ax = plt.subplots(figsize=(12, 6))
755
+
756
+ x = np.arange(len(cv_results))
757
+ width = 0.35
758
+
759
+ accuracies = [r['accuracy_mean'] for r in cv_results.values()]
760
+ f1_scores = [r['f1_macro_mean'] for r in cv_results.values()]
761
+
762
+ bars1 = ax.bar(x - width/2, accuracies, width, label='Accuracy', color='steelblue')
763
+ bars2 = ax.bar(x + width/2, f1_scores, width, label='F1 (Macro)', color='darkorange')
764
+
765
+ ax.set_ylabel('Score')
766
+ ax.set_title('Model Comparison - Cross-Validation Results')
767
+ ax.set_xticks(x)
768
+ ax.set_xticklabels(cv_results.keys())
769
+ ax.legend()
770
+ ax.set_ylim(0, 1.0)
771
+
772
+ for bar in bars1 + bars2:
773
+ height = bar.get_height()
774
+ ax.annotate(f'{height:.3f}', xy=(bar.get_x() + bar.get_width()/2, height),
775
+ xytext=(0, 3), textcoords="offset points", ha='center', fontsize=9)
776
+
777
+ plt.tight_layout()
778
+ plt.savefig('11_model_comparison.png', dpi=150, bbox_inches='tight')
779
+ plt.show()
780
+
781
+ create_comparison_summary(cv_results, best_model_name, final_results['accuracy'])
782
+
783
+
784
+ # =============================================================================
785
+ # 13. AGENT-READY PREDICTION CLASS
786
+ # =============================================================================
787
+
788
+ class StudentGradePredictor:
789
+ """
790
+ Production-ready grade prediction class for agent integration.
791
+ """
792
+
793
+ def __init__(self, model, preprocessor: StudentDataPreprocessor):
794
+ self.model = model
795
+ self.preprocessor = preprocessor
796
+ self.grade_order = ['A', 'B', 'C', 'D', 'F']
797
+
798
+ self.valid_ranges = {
799
+ 'hours_studied': (0, 50),
800
+ 'previous_scores': (0, 100),
801
+ 'sleep_hours': (0, 24),
802
+ 'sample_papers': (0, 20),
803
+ 'extracurricular': ['Yes', 'No']
804
+ }
805
+
806
+ def validate_input(self, hours_studied, previous_scores, sleep_hours,
807
+ sample_papers, extracurricular) -> tuple:
808
+ """Validate input values."""
809
+
810
+ errors = []
811
+
812
+ # Check numerical ranges
813
+ checks = [
814
+ ('hours_studied', hours_studied, self.valid_ranges['hours_studied']),
815
+ ('previous_scores', previous_scores, self.valid_ranges['previous_scores']),
816
+ ('sleep_hours', sleep_hours, self.valid_ranges['sleep_hours']),
817
+ ('sample_papers', sample_papers, self.valid_ranges['sample_papers']),
818
+ ]
819
+
820
+ for name, value, (min_val, max_val) in checks:
821
+ if not (min_val <= value <= max_val):
822
+ errors.append(f"{name} must be between {min_val} and {max_val} (got {value})")
823
+
824
+ # Check categorical
825
+ if extracurricular not in self.valid_ranges['extracurricular']:
826
+ errors.append(f"extracurricular must be 'Yes' or 'No' (got {extracurricular})")
827
+
828
+ if errors:
829
+ return False, "; ".join(errors)
830
+ return True, "Valid"
831
+
832
+ def predict(self, hours_studied: float, previous_scores: float,
833
+ sleep_hours: float, sample_papers: int,
834
+ extracurricular: str) -> dict:
835
+ """
836
+ Make a grade prediction with confidence scores.
837
+
838
+ Parameters:
839
+ -----------
840
+ hours_studied : float - Total hours spent studying (0-50)
841
+ previous_scores : float - Previous test scores (0-100)
842
+ sleep_hours : float - Average daily sleep hours (0-24)
843
+ sample_papers : int - Number of practice papers completed (0-20)
844
+ extracurricular : str - Participates in extracurricular activities ('Yes'/'No')
845
+
846
+ Returns:
847
+ --------
848
+ dict : Prediction results
849
+ """
850
+
851
+ # Validate input
852
+ is_valid, message = self.validate_input(
853
+ hours_studied, previous_scores, sleep_hours, sample_papers, extracurricular
854
+ )
855
+ if not is_valid:
856
+ return {
857
+ 'success': False,
858
+ 'error': message,
859
+ 'predicted_grade': None,
860
+ 'confidence': None
861
+ }
862
+
863
+ # Transform input
864
+ X = self.preprocessor.transform_single(
865
+ hours_studied, previous_scores, sleep_hours,
866
+ sample_papers, extracurricular
867
+ )
868
+
869
+ # Predict
870
+ prediction = self.model.predict(X)[0]
871
+ probabilities = self.model.predict_proba(X)[0]
872
+
873
+ predicted_grade = self.preprocessor.get_grade_from_encoding(prediction)
874
+ confidence = probabilities[prediction]
875
+
876
+ # Probability distribution
877
+ prob_distribution = {}
878
+ for i, grade in enumerate(self.preprocessor.label_encoder.classes_):
879
+ prob_distribution[grade] = round(probabilities[i] * 100, 2)
880
+
881
+ # Generate insights
882
+ recommendation = self._generate_recommendation(
883
+ predicted_grade, confidence, hours_studied, previous_scores,
884
+ sleep_hours, sample_papers, extracurricular
885
+ )
886
+
887
+ confidence_level = self._get_confidence_level(confidence)
888
+
889
+ return {
890
+ 'success': True,
891
+ 'predicted_grade': predicted_grade,
892
+ 'confidence': round(confidence * 100, 2),
893
+ 'confidence_level': confidence_level,
894
+ 'probability_distribution': prob_distribution,
895
+ 'input_summary': {
896
+ 'hours_studied': hours_studied,
897
+ 'previous_scores': previous_scores,
898
+ 'sleep_hours': sleep_hours,
899
+ 'sample_papers': sample_papers,
900
+ 'extracurricular': extracurricular
901
+ },
902
+ 'recommendation': recommendation,
903
+ 'disclaimer': (
904
+ "This prediction is based on statistical patterns and should inform, "
905
+ "not replace, professional educator judgment."
906
+ )
907
+ }
908
+
909
+ def _get_confidence_level(self, confidence: float) -> str:
910
+ if confidence >= 0.7:
911
+ return "HIGH"
912
+ elif confidence >= 0.4:
913
+ return "MODERATE"
914
+ else:
915
+ return "LOW"
916
+
917
+ def _generate_recommendation(self, grade, confidence, hours_studied,
918
+ previous_scores, sleep_hours, sample_papers,
919
+ extracurricular):
920
+ """Generate actionable recommendations."""
921
+
922
+ recommendations = []
923
+
924
+ if grade in ['D', 'F']:
925
+ recommendations.append("⚠️ Student may need intervention.")
926
+
927
+ if hours_studied < 5:
928
+ recommendations.append("📚 Study hours are very low - recommend study plan.")
929
+ if previous_scores < 60:
930
+ recommendations.append("📝 Previous performance concerning - consider tutoring.")
931
+ if sleep_hours < 6:
932
+ recommendations.append("😴 Sleep deprivation may be affecting performance.")
933
+ if sample_papers < 2:
934
+ recommendations.append("📋 More practice tests recommended.")
935
+
936
+ elif grade == 'C':
937
+ recommendations.append("📊 Average performance - room for improvement.")
938
+ if hours_studied < 7:
939
+ recommendations.append("📚 Increasing study hours could help.")
940
+ if sample_papers < 3:
941
+ recommendations.append("📋 More practice papers recommended.")
942
+
943
+ elif grade == 'B':
944
+ recommendations.append("👍 Good performance.")
945
+ if hours_studied < 8 or sample_papers < 4:
946
+ recommendations.append("📈 Small improvements could push to A grade.")
947
+
948
+ else: # A
949
+ recommendations.append("🌟 Excellent! Student is performing very well.")
950
+
951
+ if confidence < 0.4:
952
+ recommendations.append("⚡ Low confidence - consider additional assessment.")
953
+
954
+ return " ".join(recommendations)
955
+
956
+ def predict_batch(self, df: pd.DataFrame) -> pd.DataFrame:
957
+ """Make predictions for multiple students."""
958
+
959
+ results = []
960
+ for _, row in df.iterrows():
961
+ result = self.predict(
962
+ row['Hours Studied'],
963
+ row['Previous Scores'],
964
+ row['Sleep Hours'],
965
+ row['Sample Question Papers Practiced'],
966
+ row['Extracurricular Activities']
967
+ )
968
+ results.append({
969
+ 'predicted_grade': result.get('predicted_grade'),
970
+ 'confidence': result.get('confidence'),
971
+ 'confidence_level': result.get('confidence_level')
972
+ })
973
+
974
+ return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)], axis=1)
975
+
976
+ def save(self, directory: str = 'model_artifacts'):
977
+ """Save all model artifacts."""
978
+ path = Path(directory)
979
+ path.mkdir(exist_ok=True)
980
+
981
+ joblib.dump(self.model, path / 'model.pkl')
982
+ joblib.dump(self.preprocessor, path / 'preprocessor.pkl')
983
+ joblib.dump(self.valid_ranges, path / 'valid_ranges.pkl')
984
+
985
+ print(f"✅ Model artifacts saved to '{directory}/'")
986
+
987
+ @classmethod
988
+ def load(cls, directory: str = 'model_artifacts'):
989
+ """Load model artifacts."""
990
+ path = Path(directory)
991
+
992
+ model = joblib.load(path / 'model.pkl')
993
+ preprocessor = joblib.load(path / 'preprocessor.pkl')
994
+
995
+ predictor = cls(model, preprocessor)
996
+ predictor.valid_ranges = joblib.load(path / 'valid_ranges.pkl')
997
+
998
+ print(f"✅ Model loaded from '{directory}/'")
999
+ return predictor
1000
+
1001
+
1002
+ # Initialize and save predictor
1003
+ predictor = StudentGradePredictor(best_model, preprocessor)
1004
+ predictor.save('model_artifacts')
1005
+
1006
+
1007
+ # =============================================================================
1008
+ # 14. INTERACTIVE DEMONSTRATION
1009
+ # =============================================================================
1010
+
1011
+ def display_prediction_report(result: dict):
1012
+ """Display a formatted prediction report."""
1013
+
1014
+ if not result['success']:
1015
+ print(f"\n❌ PREDICTION FAILED: {result['error']}")
1016
+ return
1017
+
1018
+ print("\n" + "=" * 60)
1019
+ print(" 🎓 STUDENT PERFORMANCE PREDICTION REPORT")
1020
+ print("=" * 60)
1021
+
1022
+ inp = result['input_summary']
1023
+ print(f"\n📋 INPUT PARAMETERS:")
1024
+ print(f" • Hours Studied: {inp['hours_studied']:>6} h")
1025
+ print(f" • Previous Scores: {inp['previous_scores']:>6}")
1026
+ print(f" • Sleep Hours: {inp['sleep_hours']:>6} h/day")
1027
+ print(f" • Practice Papers: {inp['sample_papers']:>6}")
1028
+ print(f" • Extracurricular: {inp['extracurricular']:>6}")
1029
+
1030
+ print(f"\n🎯 PREDICTION:")
1031
+ print(f" • Predicted Grade: {result['predicted_grade']}")
1032
+ print(f" • Confidence: {result['confidence']:.1f}% ({result['confidence_level']})")
1033
+
1034
+ print(f"\n📊 PROBABILITY DISTRIBUTION:")
1035
+ for grade in ['A', 'B', 'C', 'D', 'F']:
1036
+ prob = result['probability_distribution'].get(grade, 0)
1037
+ bar_length = int(prob / 5)
1038
+ bar = "█" * bar_length
1039
+ print(f" {grade}: {bar:<20} {prob:>5.1f}%")
1040
+
1041
+ print(f"\n💡 RECOMMENDATION:")
1042
+ print(f" {result['recommendation']}")
1043
+
1044
+ print("=" * 60)
1045
+
1046
+ print("\n" + "🧪 " * 20)
1047
+ print(" INTERACTIVE PREDICTION DEMONSTRATIONS")
1048
+ print("🧪 " * 20)
1049
+
1050
+ # Test Case 1: High-performing student
1051
+ result1 = predictor.predict(
1052
+ hours_studied=9,
1053
+ previous_scores=95,
1054
+ sleep_hours=8,
1055
+ sample_papers=5,
1056
+ extracurricular='Yes'
1057
+ )
1058
+ display_prediction_report(result1)
1059
+
1060
+ # Test Case 2: Struggling student
1061
+ result2 = predictor.predict(
1062
+ hours_studied=2,
1063
+ previous_scores=45,
1064
+ sleep_hours=5,
1065
+ sample_papers=0,
1066
+ extracurricular='No'
1067
+ )
1068
+ display_prediction_report(result2)
1069
+
1070
+ # Test Case 3: Average student
1071
+ result3 = predictor.predict(
1072
+ hours_studied=5,
1073
+ previous_scores=70,
1074
+ sleep_hours=7,
1075
+ sample_papers=2,
1076
+ extracurricular='Yes'
1077
+ )
1078
+ display_prediction_report(result3)
1079
+
1080
+ # Test Case 4: Edge case - high previous scores but low effort
1081
+ result4 = predictor.predict(
1082
+ hours_studied=1,
1083
+ previous_scores=85,
1084
+ sleep_hours=6,
1085
+ sample_papers=1,
1086
+ extracurricular='No'
1087
+ )
1088
+ display_prediction_report(result4)
1089
+
1090
+ # Test Case 5: Invalid input
1091
+ result5 = predictor.predict(
1092
+ hours_studied=-5,
1093
+ previous_scores=150,
1094
+ sleep_hours=30,
1095
+ sample_papers=0,
1096
+ extracurricular='Maybe'
1097
+ )
1098
+ display_prediction_report(result5)
1099
+
1100
+
grade_multiclass/target_distribution.png ADDED
lr_attendance/2018-2019_Daily_Attendance_20240429.csv ADDED
The diff for this file is too large to render. See raw diff
 
lr_attendance/add_weather_features.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import requests
4
+ from datetime import datetime
5
+ import time
6
+
7
+ # Load the engineered attendance data
8
+ df = pd.read_csv("attendance_with_features.csv")
9
+
10
+ # NYC coordinates for Central Park
11
+ NYC_LAT = 40.7789
12
+ NYC_LON = -73.9692
13
+
14
+ # Convert date column to datetime if not already
15
+ df["date"] = pd.to_datetime(df["Date"], format="%Y%m%d")
16
+
17
+ # Get unique dates from our dataset
18
+ unique_dates = sorted(df["date"].dt.date.unique())
19
+ print(
20
+ f"Fetching weather data for {len(unique_dates)} unique dates from {unique_dates[0]} to {unique_dates[-1]}"
21
+ )
22
+
23
+
24
+ def fetch_weather_data(start_date, end_date):
25
+ """Fetch weather data from Open-Meteo API"""
26
+ url = "https://archive-api.open-meteo.com/v1/archive"
27
+
28
+ params = {
29
+ "latitude": NYC_LAT,
30
+ "longitude": NYC_LON,
31
+ "start_date": start_date,
32
+ "end_date": end_date,
33
+ "daily": [
34
+ "temperature_2m_max",
35
+ "temperature_2m_min",
36
+ "temperature_2m_mean",
37
+ "precipitation_sum",
38
+ "rain_sum",
39
+ "snowfall_sum",
40
+ "precipitation_hours",
41
+ "wind_speed_10m_max",
42
+ "wind_gusts_10m_max",
43
+ "weather_code",
44
+ "sunshine_duration",
45
+ "daylight_duration",
46
+ ],
47
+ "timezone": "America/New_York",
48
+ "temperature_unit": "celsius",
49
+ "wind_speed_unit": "kmh",
50
+ "precipitation_unit": "mm",
51
+ }
52
+
53
+ try:
54
+ response = requests.get(url, params=params)
55
+ response.raise_for_status()
56
+ data = response.json()
57
+
58
+ # Convert to DataFrame
59
+ weather_df = pd.DataFrame(
60
+ {
61
+ "date": pd.to_datetime(data["daily"]["time"]).date,
62
+ "temp_max": data["daily"]["temperature_2m_max"],
63
+ "temp_min": data["daily"]["temperature_2m_min"],
64
+ "temp_mean": data["daily"]["temperature_2m_mean"],
65
+ "precipitation_total": data["daily"]["precipitation_sum"],
66
+ "rain_total": data["daily"]["rain_sum"],
67
+ "snow_total": data["daily"]["snowfall_sum"],
68
+ "precipitation_hours": data["daily"]["precipitation_hours"],
69
+ "wind_speed_max": data["daily"]["wind_speed_10m_max"],
70
+ "wind_gust_max": data["daily"]["wind_gusts_10m_max"],
71
+ "weather_code": data["daily"]["weather_code"],
72
+ "sunshine_duration": data["daily"]["sunshine_duration"],
73
+ "daylight_duration": data["daily"]["daylight_duration"],
74
+ }
75
+ )
76
+
77
+ return weather_df
78
+
79
+ except Exception as e:
80
+ print(f"Error fetching weather data: {e}")
81
+ return None
82
+
83
+
84
+ # Split date range into chunks to avoid API limits
85
+ weather_data = []
86
+ chunk_size = 365 # days per request
87
+
88
+ for i in range(0, len(unique_dates), chunk_size):
89
+ chunk_dates = unique_dates[i : i + chunk_size]
90
+ start_date = chunk_dates[0].strftime("%Y-%m-%d")
91
+ end_date = chunk_dates[-1].strftime("%Y-%m-%d")
92
+
93
+ print(f"Fetching weather for {start_date} to {end_date}...")
94
+
95
+ chunk_weather = fetch_weather_data(start_date, end_date)
96
+ if chunk_weather is not None:
97
+ weather_data.append(chunk_weather)
98
+
99
+ # Rate limiting
100
+ time.sleep(1)
101
+
102
+ # Combine all weather data
103
+ if weather_data:
104
+ weather_df = pd.concat(weather_data, ignore_index=True)
105
+ print(f"Successfully fetched weather data for {len(weather_df)} days")
106
+
107
+ # Save weather data
108
+ weather_df.to_csv("nyc_weather_2018_2019.csv", index=False)
109
+ print("Weather data saved as 'nyc_weather_2018_2019.csv'")
110
+
111
+ # Merge with attendance data
112
+ df["date_key"] = df["date"].dt.date
113
+ weather_df["date_key"] = weather_df["date"]
114
+
115
+ # Merge weather features
116
+ attendance_with_weather = df.merge(
117
+ weather_df.drop("date", axis=1), on="date_key", how="left"
118
+ )
119
+
120
+ # Create weather-related features
121
+ attendance_with_weather["temp_range"] = (
122
+ attendance_with_weather["temp_max"] - attendance_with_weather["temp_min"]
123
+ )
124
+ attendance_with_weather["is_rainy_day"] = (
125
+ attendance_with_weather["precipitation_total"] > 2.0
126
+ ).astype(int)
127
+ attendance_with_weather["is_snowy_day"] = (
128
+ attendance_with_weather["snow_total"] > 0.5
129
+ ).astype(int)
130
+ attendance_with_weather["is_windy_day"] = (
131
+ attendance_with_weather["wind_speed_max"] > 20.0
132
+ ).astype(int)
133
+ attendance_with_weather["is_extreme_temp"] = (
134
+ (attendance_with_weather["temp_max"] > 32)
135
+ | (attendance_with_weather["temp_min"] < -5)
136
+ ).astype(int)
137
+
138
+ # Weather severity score (0-1, higher = worse conditions)
139
+ attendance_with_weather["weather_severity"] = (
140
+ attendance_with_weather["precipitation_total"] / 50 # normalize heavy rain
141
+ + attendance_with_weather["snow_total"] / 20 # normalize snow
142
+ + attendance_with_weather["wind_speed_max"] / 50 # normalize wind
143
+ ).clip(0, 1)
144
+
145
+ print("\nWeather features added:")
146
+ weather_features = [
147
+ col
148
+ for col in attendance_with_weather.columns
149
+ if col
150
+ in [
151
+ "temp_max",
152
+ "temp_min",
153
+ "temp_mean",
154
+ "temp_range",
155
+ "precipitation_total",
156
+ "rain_total",
157
+ "snow_total",
158
+ "precipitation_hours",
159
+ "wind_speed_max",
160
+ "wind_gust_max",
161
+ "weather_code",
162
+ "sunshine_duration",
163
+ "daylight_duration",
164
+ "is_rainy_day",
165
+ "is_snowy_day",
166
+ "is_windy_day",
167
+ "is_extreme_temp",
168
+ "weather_severity",
169
+ ]
170
+ ]
171
+ for feature in weather_features:
172
+ print(f"- {feature}")
173
+
174
+ # Save final dataset
175
+ attendance_with_weather.to_csv("attendance_features_complete.csv", index=False)
176
+ print(
177
+ f"\nFinal dataset with {len(attendance_with_weather.columns)} total features saved as 'attendance_features_complete.csv'"
178
+ )
179
+
180
+ print("\nSample of weather-related features:")
181
+ print(
182
+ attendance_with_weather[
183
+ [
184
+ "Date",
185
+ "attendance_rate",
186
+ "temp_mean",
187
+ "precipitation_total",
188
+ "is_rainy_day",
189
+ "weather_severity",
190
+ ]
191
+ ].head(10)
192
+ )
193
+
194
+ else:
195
+ print("Failed to fetch weather data")
lr_attendance/best_model_coefficients.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Feature,Coefficient
2
+ school_avg_attendance,7.137911799297406
3
+ temp_mean,1.4284187566515165
4
+ day_of_week,-0.9758038207498889
5
+ precipitation_hours,-0.7556588622303482
6
+ is_holiday,-0.6112820331872162
7
+ is_snowy_day,-0.6059281530329762
8
+ is_monday,-0.5594793735265182
9
+ school_std_attendance,0.5048872431769107
10
+ is_rainy_day,0.1904241431075877
11
+ days_to_next_holiday,0.13888911829632
12
+ school_year_progress,0.09479814368925972
13
+ is_friday,-0.016717455812732072
lr_attendance/explore_data.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from datetime import datetime, timedelta
4
+ import holidays
5
+
6
+ # Load the data
7
+ df = pd.read_csv("2018-2019_Daily_Attendance_20240429.csv")
8
+
9
+ # Basic dataset analysis
10
+ print("Dataset Info:")
11
+ print(f"Total records: {len(df)}")
12
+ print(f"Date range: {df['Date'].min()} to {df['Date'].max()}")
13
+ print(f"Unique schools: {df['School DBN'].nunique()}")
14
+ print("\nColumns:", df.columns.tolist())
15
+
16
+ # Check for missing values
17
+ print("\nMissing values:")
18
+ print(df.isnull().sum())
19
+
20
+ # Create attendance rate
21
+ df["attendance_rate"] = (df["Present"] / df["Enrolled"]) * 100
22
+
23
+ # Basic statistics
24
+ print("\nAttendance Rate Statistics:")
25
+ print(df["attendance_rate"].describe())
26
+
27
+ print("\nSample data:")
28
+ print(df.head())
lr_attendance/feature_engineering.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from datetime import datetime
4
+ import holidays
5
+
6
+ # Load the data
7
+ df = pd.read_csv("2018-2019_Daily_Attendance_20240429.csv")
8
+
9
+ # Convert date column to datetime
10
+ df["date"] = pd.to_datetime(df["Date"], format="%Y%m%d")
11
+
12
+ # Create attendance rate (target variable)
13
+ df["attendance_rate"] = (df["Present"] / df["Enrolled"]) * 100
14
+
15
+ # Extract temporal features
16
+ df["day_of_week"] = df["date"].dt.dayofweek # 0=Monday, 6=Sunday
17
+ df["day_of_week_name"] = df["date"].dt.day_name()
18
+ df["month"] = df["date"].dt.month
19
+ df["month_name"] = df["date"].dt.month_name()
20
+ df["quarter"] = df["date"].dt.quarter
21
+ df["week_of_year"] = df["date"].dt.isocalendar().week
22
+ df["day_of_month"] = df["date"].dt.day
23
+ df["day_of_year"] = df["date"].dt.dayofyear
24
+
25
+
26
+ # Season mapping
27
+ def get_season(month):
28
+ if month in [12, 1, 2]:
29
+ return "Winter"
30
+ elif month in [3, 4, 5]:
31
+ return "Spring"
32
+ elif month in [6, 7, 8]:
33
+ return "Summer"
34
+ else:
35
+ return "Fall"
36
+
37
+
38
+ df["season"] = df["month"].apply(get_season)
39
+
40
+ # Weekend indicator
41
+ df["is_weekend"] = (df["day_of_week"] >= 5).astype(int)
42
+
43
+ # School day indicators (assuming Mon-Fri are school days)
44
+ df["is_school_day"] = (df["day_of_week"] < 5).astype(int)
45
+
46
+ # NYC Public School Holidays for 2018-2019 school year
47
+ nyc_holidays_2018_19 = [
48
+ "2018-09-10", # Rosh Hashanah (Observed)
49
+ "2018-09-11", # Rosh Hashanah (Observed)
50
+ "2018-09-19", # Yom Kippur
51
+ "2018-10-08", # Columbus Day
52
+ "2018-11-06", # Election Day
53
+ "2018-11-12", # Veterans Day
54
+ "2018-11-22", # Thanksgiving Day
55
+ "2018-11-23", # Thanksgiving Recess
56
+ "2018-12-24", # Winter Recess
57
+ "2018-12-25", # Christmas Day
58
+ "2018-12-26", # Winter Recess
59
+ "2018-12-27", # Winter Recess
60
+ "2018-12-28", # Winter Recess
61
+ "2018-12-31", # Winter Recess
62
+ "2019-01-01", # New Year's Day
63
+ "2019-01-02", # Winter Recess
64
+ "2019-01-21", # Dr. Martin Luther King Jr. Day
65
+ "2019-02-18", # Midwinter Recess
66
+ "2019-02-19", # Midwinter Recess
67
+ "2019-02-20", # Midwinter Recess
68
+ "2019-02-21", # Midwinter Recess
69
+ "2019-02-22", # Midwinter Recess
70
+ "2019-04-15", # Spring Recess
71
+ "2019-04-16", # Spring Recess
72
+ "2019-04-17", # Spring Recess
73
+ "2019-04-18", # Spring Recess
74
+ "2019-04-19", # Spring Recess
75
+ "2019-04-22", # Spring Recess
76
+ "2019-04-23", # Spring Recess
77
+ "2019-04-24", # Spring Recess
78
+ "2019-04-25", # Spring Recess
79
+ "2019-05-27", # Memorial Day
80
+ "2019-06-06", # Chancellor's Conference Day
81
+ "2019-06-11", # Anniversary Day
82
+ ]
83
+
84
+ # Convert to datetime
85
+ holiday_dates = pd.to_datetime(nyc_holidays_2018_19)
86
+
87
+ # Add holiday indicators
88
+ df["is_holiday"] = df["date"].isin(holiday_dates.tolist()).astype(int)
89
+
90
+ # Add proximity to holiday features
91
+ df["days_to_next_holiday"] = 0
92
+ df["days_since_last_holiday"] = 0
93
+
94
+ for idx, row in df.iterrows():
95
+ current_date = row["date"]
96
+
97
+ # Days to next holiday
98
+ future_holidays = holiday_dates[holiday_dates > current_date]
99
+ if len(future_holidays) > 0:
100
+ df.loc[idx, "days_to_next_holiday"] = (
101
+ future_holidays.min() - current_date
102
+ ).days
103
+
104
+ # Days since last holiday
105
+ past_holidays = holiday_dates[holiday_dates < current_date]
106
+ if len(past_holidays) > 0:
107
+ df.loc[idx, "days_since_last_holiday"] = (
108
+ current_date - past_holidays.max()
109
+ ).days
110
+
111
+ # Special events/conditions that might affect attendance
112
+ df["is_month_start"] = (df["day_of_month"] <= 3).astype(int)
113
+ df["is_month_end"] = (df["day_of_month"] >= 28).astype(int)
114
+ df["is_friday"] = (df["day_of_week"] == 4).astype(int)
115
+ df["is_monday"] = (df["day_of_week"] == 0).astype(int)
116
+
117
+ # Progress through school year (normalized)
118
+ school_year_start = pd.to_datetime("2018-09-04")
119
+ school_year_end = pd.to_datetime("2019-06-26")
120
+ df["school_year_progress"] = (
121
+ (df["date"] - school_year_start).dt.days
122
+ / (school_year_end - school_year_start).days
123
+ ).clip(0, 1)
124
+
125
+ print("Feature Engineering Complete!")
126
+ print(f"Total features created: {len(df.columns)}")
127
+ print("\nNew features added:")
128
+ new_features = [
129
+ col
130
+ for col in df.columns
131
+ if col not in ["School DBN", "Date", "Enrolled", "Absent", "Present", "Released"]
132
+ ]
133
+ for feature in new_features:
134
+ print(f"- {feature}")
135
+
136
+ print("\nSample of engineered features:")
137
+ print(
138
+ df[
139
+ [
140
+ "Date",
141
+ "attendance_rate",
142
+ "day_of_week_name",
143
+ "month_name",
144
+ "season",
145
+ "is_holiday",
146
+ "days_to_next_holiday",
147
+ "is_friday",
148
+ ]
149
+ ].head(10)
150
+ )
151
+
152
+ # Save engineered dataset
153
+ df.to_csv("attendance_with_features.csv", index=False)
154
+ print("\nDataset saved as 'attendance_with_features.csv'")
lr_attendance/feature_info.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "final_features": [
3
+ "day_of_week",
4
+ "month",
5
+ "quarter",
6
+ "week_of_year",
7
+ "day_of_month",
8
+ "day_of_year",
9
+ "is_weekend",
10
+ "is_school_day",
11
+ "is_month_start",
12
+ "is_month_end",
13
+ "is_friday",
14
+ "is_monday",
15
+ "school_year_progress",
16
+ "is_holiday",
17
+ "days_to_next_holiday",
18
+ "days_since_last_holiday",
19
+ "temp_max",
20
+ "temp_min",
21
+ "temp_mean",
22
+ "temp_range",
23
+ "precipitation_total",
24
+ "rain_total",
25
+ "snow_total",
26
+ "precipitation_hours",
27
+ "wind_speed_max",
28
+ "wind_gust_max",
29
+ "sunshine_duration",
30
+ "daylight_duration",
31
+ "is_rainy_day",
32
+ "is_snowy_day",
33
+ "is_windy_day",
34
+ "is_extreme_temp",
35
+ "weather_severity",
36
+ "temp_humidity_interaction",
37
+ "wind_precip_interaction",
38
+ "holiday_weather_interaction",
39
+ "temp_squared",
40
+ "precipitation_squared",
41
+ "season_encoded"
42
+ ],
43
+ "temporal_features": [
44
+ "day_of_week",
45
+ "month",
46
+ "quarter",
47
+ "week_of_year",
48
+ "day_of_month",
49
+ "day_of_year",
50
+ "is_weekend",
51
+ "is_school_day",
52
+ "is_month_start",
53
+ "is_month_end",
54
+ "is_friday",
55
+ "is_monday",
56
+ "school_year_progress"
57
+ ],
58
+ "holiday_features": [
59
+ "is_holiday",
60
+ "days_to_next_holiday",
61
+ "days_since_last_holiday"
62
+ ],
63
+ "weather_features": [
64
+ "temp_max",
65
+ "temp_min",
66
+ "temp_mean",
67
+ "temp_range",
68
+ "precipitation_total",
69
+ "rain_total",
70
+ "snow_total",
71
+ "precipitation_hours",
72
+ "wind_speed_max",
73
+ "wind_gust_max",
74
+ "sunshine_duration",
75
+ "daylight_duration",
76
+ "is_rainy_day",
77
+ "is_snowy_day",
78
+ "is_windy_day",
79
+ "is_extreme_temp",
80
+ "weather_severity"
81
+ ],
82
+ "target_correlations": {
83
+ "attendance_rate": 1.0,
84
+ "school_year_progress": 0.15298705946434624,
85
+ "quarter": 0.09570330953211145,
86
+ "daylight_duration": 0.08974924827192297,
87
+ "is_snowy_day": 0.0851888567499552,
88
+ "days_to_next_holiday": 0.08037939994225182,
89
+ "month": 0.07982450259808478,
90
+ "week_of_year": 0.07477659348367248,
91
+ "day_of_year": 0.07443173468815607,
92
+ "day_of_month": 0.06875319490990786,
93
+ "snow_total": 0.06750581579691282,
94
+ "is_friday": 0.0642459731182472,
95
+ "is_holiday": 0.05859963560671229,
96
+ "precipitation_hours": 0.05855321374409788,
97
+ "is_rainy_day": 0.054753317922505866,
98
+ "precipitation_total": 0.0546754489544741,
99
+ "is_extreme_temp": 0.04877986561324521,
100
+ "day_of_week": 0.04745618753325818,
101
+ "rain_total": 0.04531486647524365,
102
+ "is_month_end": 0.03061269671737044,
103
+ "days_since_last_holiday": 0.029162742921903476,
104
+ "weather_severity": 0.027970448352187427,
105
+ "is_month_start": 0.023797389694042725,
106
+ "temp_mean": 0.013792886147146688,
107
+ "temp_min": 0.013129987102531162,
108
+ "temp_max": 0.012837652673607865,
109
+ "wind_gust_max": 0.010101066578409116,
110
+ "is_monday": 0.009588967551783255,
111
+ "sunshine_duration": 0.003997196985383076,
112
+ "wind_speed_max": 0.0018488325630909438,
113
+ "temp_range": 0.0014876996432690002,
114
+ "is_windy_day": 0.0009326180610937769,
115
+ "is_weekend": NaN,
116
+ "is_school_day": NaN
117
+ }
118
+ }
lr_attendance/final_coefficients.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Feature,Coefficient
2
+ school_avg_attendance,7.171348715713736
3
+ school_year_progress,-1.5605297921826944
4
+ days_to_next_holiday,0.7496704390789359
5
+ is_snowy_day,-0.66548297262725
6
+ precipitation_hours,-0.4631543689630163
7
+ is_friday,-0.3230532695248619
8
+ day_of_week,-0.3176427595205633
9
+ temp_mean,0.2923542560616456
10
+ is_holiday,-0.24976999079401288
11
+ is_monday,-0.21064021730157015
12
+ is_rainy_day,0.037596682642466635
13
+ school_std_attendance,-0.0019548833102194414
lr_attendance/final_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
lr_attendance/improved_predictions.csv ADDED
The diff for this file is too large to render. See raw diff
 
lr_attendance/model_comparison.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Model,R²,RMSE,MAE
2
+ A: Baseline (no school),-0.19632912335295583,13.229195648314445,7.397234417574075
3
+ B: With School Avg ⭐,0.23948647822468483,10.547792357860521,5.784459632103281
4
+ C: Predict Deviation,0.256636459570448,10.428185068430691,5.835936867686833
5
+ D: Log Transform,0.238213937586648,10.556613295212724,5.9642873221393735
lr_attendance/model_summary.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ r2_train,r2_test,rmse_test,mae_test,cv_r2_mean,cv_r2_std,n_train,n_test,n_features
2
+ 0.6090720669046676,0.609192651642787,5.943806665659979,3.1503211971515586,0.6089838172103861,0.004961794857370117,221720,55431,12
lr_attendance/nyc_weather_2018_2019.csv ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ date,temp_max,temp_min,temp_mean,precipitation_total,rain_total,snow_total,precipitation_hours,wind_speed_max,wind_gust_max,weather_code,sunshine_duration,daylight_duration
2
+ 2018-09-04,31.2,23.9,27.5,0.0,0.0,0.0,0.0,10.5,21.2,3,43013.66,46669.32
3
+ 2018-09-05,29.4,23.2,26.2,1.1,1.1,0.0,4.0,17.9,34.9,53,42916.46,46511.89
4
+ 2018-09-06,32.3,23.6,26.8,4.6,4.6,0.0,7.0,17.6,38.5,61,33312.05,46353.66
5
+ 2018-09-07,24.3,20.7,22.8,4.9,4.9,0.0,16.0,16.1,31.3,53,1282.37,46194.73
6
+ 2018-09-08,21.1,15.6,18.8,6.9,6.9,0.0,12.0,18.4,36.7,55,2515.72,46035.2
7
+ 2018-09-09,17.2,13.7,15.3,17.9,17.9,0.0,24.0,18.8,37.4,61,0.0,45875.18
8
+ 2018-09-10,17.5,13.6,16.0,30.8,30.8,0.0,24.0,28.8,56.5,63,0.0,45714.75
9
+ 2018-09-11,24.4,17.3,20.9,14.7,14.7,0.0,11.0,17.0,36.0,63,4967.27,45554.02
10
+ 2018-09-12,25.5,20.6,22.8,17.3,17.3,0.0,16.0,10.8,23.4,63,3325.69,45393.1
11
+ 2018-09-13,24.3,20.7,22.2,29.5,29.5,0.0,9.0,17.4,34.2,65,24258.51,45232.07
12
+ 2018-09-14,23.9,20.2,21.6,0.4,0.4,0.0,4.0,15.8,32.0,51,3740.55,45071.04
13
+ 2018-09-15,24.9,18.3,21.4,0.3,0.3,0.0,3.0,10.1,19.4,51,32685.79,44910.09
14
+ 2018-09-16,27.4,18.1,21.7,0.0,0.0,0.0,0.0,14.1,28.4,3,34611.62,44749.33
15
+ 2018-09-17,24.7,18.1,22.0,4.8,4.8,0.0,14.0,18.9,39.2,53,6915.09,44588.84
16
+ 2018-09-18,25.1,20.4,22.7,20.5,20.5,0.0,14.0,21.6,38.5,63,9241.86,44428.73
17
+ 2018-09-19,26.6,19.4,22.4,1.7,1.7,0.0,6.0,16.6,33.5,53,39495.63,44269.07
18
+ 2018-09-20,20.8,17.8,19.4,3.2,3.2,0.0,12.0,10.9,23.8,53,11825.79,44109.98
19
+ 2018-09-21,23.0,19.4,20.8,0.0,0.0,0.0,0.0,23.2,44.3,3,2846.96,43951.14
20
+ 2018-09-22,22.2,16.0,20.0,2.4,2.4,0.0,3.0,23.1,46.1,61,35033.01,43791.55
21
+ 2018-09-23,18.8,14.8,16.6,1.9,1.9,0.0,13.0,11.9,24.1,51,0.0,43631.24
22
+ 2018-09-24,20.1,14.1,16.8,0.1,0.1,0.0,1.0,23.3,48.2,51,24689.51,43470.31
23
+ 2018-09-25,21.8,14.3,18.9,61.4,61.4,0.0,22.0,28.1,56.2,65,0.0,43308.86
24
+ 2018-09-26,26.3,20.6,23.1,10.9,10.9,0.0,11.0,22.5,47.2,63,35389.68,43147.0
25
+ 2018-09-27,21.8,15.0,18.4,5.7,5.7,0.0,6.0,22.2,39.6,61,32400.0,42984.82
26
+ 2018-09-28,19.5,13.9,16.4,29.3,29.3,0.0,9.0,27.3,63.4,63,19786.42,42822.43
27
+ 2018-09-29,22.2,13.7,17.5,0.0,0.0,0.0,0.0,11.8,25.2,3,38584.73,42659.93
28
+ 2018-09-30,20.2,12.2,16.3,0.0,0.0,0.0,0.0,13.6,26.3,3,38508.36,42497.42
29
+ 2018-10-01,25.0,15.5,19.9,0.9,0.9,0.0,3.0,19.2,38.9,51,26480.82,42335.0
30
+ 2018-10-02,24.8,17.1,20.7,32.9,32.9,0.0,11.0,16.2,34.9,65,22627.44,42172.78
31
+ 2018-10-03,24.4,16.4,20.4,0.0,0.0,0.0,0.0,18.2,33.8,3,37493.02,42010.86
32
+ 2018-10-04,25.8,15.2,20.5,1.8,1.8,0.0,3.0,18.9,38.2,61,34062.29,41849.33
33
+ 2018-10-05,20.1,13.9,17.2,0.0,0.0,0.0,0.0,21.5,40.0,3,38192.4,41688.32
34
+ 2018-10-06,19.8,17.5,18.6,0.6,0.6,0.0,4.0,19.1,36.0,51,287.69,41527.91
35
+ 2018-10-07,26.5,19.3,22.3,0.1,0.1,0.0,1.0,16.1,29.9,51,10800.0,41368.22
36
+ 2018-10-08,19.8,17.7,18.8,0.8,0.8,0.0,5.0,17.8,34.2,51,0.0,41209.36
37
+ 2018-10-09,25.0,19.2,21.6,0.5,0.5,0.0,5.0,15.3,29.2,51,20834.79,41051.43
38
+ 2018-10-10,25.5,20.5,22.2,0.6,0.6,0.0,6.0,16.9,32.4,51,27016.84,40894.55
39
+ 2018-10-11,25.1,20.8,22.4,14.1,14.1,0.0,15.0,30.3,55.8,61,5102.43,40738.38
40
+ 2018-10-12,21.0,10.8,15.2,6.5,6.5,0.0,5.0,32.9,60.8,63,32103.53,40581.84
41
+ 2018-10-13,13.6,8.6,10.8,3.8,3.8,0.0,8.0,21.7,41.8,53,11341.76,40424.94
42
+ 2018-10-14,14.6,6.8,10.9,0.0,0.0,0.0,0.0,14.6,29.5,3,23733.44,40267.82
43
+ 2018-10-15,20.0,9.5,15.8,5.3,5.3,0.0,12.0,27.2,55.1,61,1142.64,40110.58
44
+ 2018-10-16,15.9,8.1,11.1,0.5,0.5,0.0,2.0,29.0,52.2,51,33615.7,39953.36
45
+ 2018-10-17,16.3,7.6,11.2,0.0,0.0,0.0,0.0,30.4,56.2,3,35979.66,39796.27
46
+ 2018-10-18,9.6,4.7,7.0,0.0,0.0,0.0,0.0,23.6,46.1,1,36329.9,39639.44
47
+ 2018-10-19,14.6,4.2,9.6,0.3,0.3,0.0,1.0,20.5,39.2,51,34590.27,39482.98
48
+ 2018-10-20,17.1,9.1,13.1,6.1,6.1,0.0,7.0,23.1,47.5,63,27650.76,39327.03
49
+ 2018-10-21,9.2,3.8,7.0,0.3,0.3,0.0,2.0,33.1,63.0,51,27771.21,39171.71
50
+ 2018-10-22,10.6,3.5,7.2,0.0,0.0,0.0,0.0,17.1,35.3,3,31360.17,39017.16
51
+ 2018-10-23,16.7,7.1,11.3,0.1,0.1,0.0,1.0,18.7,38.5,51,32036.72,38863.5
52
+ 2018-10-24,13.1,6.1,9.0,0.0,0.0,0.0,0.0,30.3,56.9,3,34822.13,38710.88
53
+ 2018-10-25,10.4,3.9,6.6,0.0,0.0,0.0,0.0,19.6,39.2,3,34795.7,38559.43
54
+ 2018-10-26,10.1,2.5,6.5,0.2,0.2,0.0,1.0,18.0,32.8,51,32794.06,38409.29
55
+ 2018-10-27,11.5,6.5,9.0,40.5,40.5,0.0,24.0,36.4,72.0,63,0.0,38260.59
56
+ 2018-10-28,11.6,6.8,9.3,1.3,1.3,0.0,5.0,17.2,39.2,53,16467.77,38113.5
57
+ 2018-10-29,13.6,7.2,10.4,2.0,2.0,0.0,7.0,23.3,44.3,53,29238.26,37968.17
58
+ 2018-10-30,12.4,4.2,7.7,0.0,0.0,0.0,0.0,20.7,41.0,0,34033.08,37824.72
59
+ 2018-10-31,17.0,4.4,11.2,0.0,0.0,0.0,0.0,22.5,43.2,3,32941.03,37682.86
60
+ 2018-11-01,20.0,11.0,15.6,0.1,0.1,0.0,1.0,23.2,41.4,51,30837.66,37541.34
61
+ 2018-11-02,20.9,17.2,19.1,10.1,10.1,0.0,19.0,29.4,62.6,61,0.0,37400.25
62
+ 2018-11-03,18.5,7.3,13.3,8.1,8.1,0.0,7.0,33.8,79.9,61,22803.61,37259.71
63
+ 2018-11-04,11.8,4.7,7.8,0.0,0.0,0.0,0.0,11.3,20.5,3,33370.03,37119.88
64
+ 2018-11-05,11.4,7.1,9.8,6.2,6.2,0.0,12.0,22.0,40.3,61,0.0,36980.89
65
+ 2018-11-06,16.6,9.4,12.2,13.6,13.6,0.0,10.0,25.8,76.0,63,0.0,36842.9
66
+ 2018-11-07,16.8,8.2,12.8,0.0,0.0,0.0,0.0,23.1,43.9,3,32248.07,36706.06
67
+ 2018-11-08,12.2,5.2,8.8,0.0,0.0,0.0,0.0,18.2,32.0,3,30385.53,36570.53
68
+ 2018-11-09,10.4,4.0,7.5,14.9,14.9,0.0,9.0,24.3,47.2,63,5898.37,36436.46
69
+ 2018-11-10,9.6,1.1,5.8,0.7,0.7,0.0,4.0,33.4,63.0,51,32483.88,36304.03
70
+ 2018-11-11,7.6,0.1,3.2,0.0,0.0,0.0,0.0,17.4,36.4,0,32247.95,36173.38
71
+ 2018-11-12,8.8,0.2,4.7,1.1,1.1,0.0,2.0,12.6,26.3,55,28714.71,36044.69
72
+ 2018-11-13,10.3,5.7,7.8,18.5,18.5,0.0,12.0,25.3,50.8,63,0.0,35918.14
73
+ 2018-11-14,5.2,-0.2,2.4,0.0,0.0,0.0,0.0,26.2,51.5,3,31707.88,35793.89
74
+ 2018-11-15,3.9,-1.5,0.7,22.6,6.4,11.34,11.0,35.0,61.9,75,0.0,35672.12
75
+ 2018-11-16,6.2,1.8,4.1,9.6,7.5,1.47,10.0,39.1,82.8,75,28800.0,35553.02
76
+ 2018-11-17,8.7,0.8,4.4,0.0,0.0,0.0,0.0,20.2,42.1,3,30982.55,35436.78
77
+ 2018-11-18,5.6,1.1,3.8,0.4,0.2,0.14,2.0,11.3,22.0,71,9379.0,35323.56
78
+ 2018-11-19,11.6,2.7,6.5,0.0,0.0,0.0,0.0,9.5,19.1,3,20971.63,35213.56
79
+ 2018-11-20,8.1,2.2,5.4,0.5,0.5,0.0,1.0,22.7,42.5,53,21543.86,35106.54
80
+ 2018-11-21,7.3,-3.0,2.8,0.2,0.2,0.0,2.0,28.5,55.8,51,31189.29,35001.36
81
+ 2018-11-22,-3.2,-7.7,-5.4,0.0,0.0,0.0,0.0,27.8,53.3,3,31141.97,34898.11
82
+ 2018-11-23,-0.8,-8.4,-4.5,0.0,0.0,0.0,0.0,16.9,34.2,3,31044.16,34796.89
83
+ 2018-11-24,7.9,-2.6,3.2,16.1,16.1,0.0,7.0,20.9,40.3,63,11437.53,34697.86
84
+ 2018-11-25,12.2,4.2,7.9,22.8,22.8,0.0,6.0,33.4,59.8,65,25200.0,34601.13
85
+ 2018-11-26,10.8,3.7,7.4,30.2,30.2,0.0,16.0,35.3,63.7,65,477.6,34506.86
86
+ 2018-11-27,7.0,3.0,4.9,1.8,1.8,0.0,4.0,32.5,61.2,53,12559.81,34415.18
87
+ 2018-11-28,5.2,2.0,3.5,0.0,0.0,0.0,0.0,36.6,68.8,3,17004.59,34326.23
88
+ 2018-11-29,6.4,1.8,4.3,0.0,0.0,0.0,0.0,31.4,59.8,3,29901.48,34240.14
89
+ 2018-11-30,4.8,-0.1,2.8,1.3,1.0,0.21,3.0,9.4,31.7,71,9364.26,34157.08
90
+ 2018-12-01,6.3,-1.0,3.3,3.0,3.0,0.0,6.0,12.6,22.0,53,26881.31,34077.17
91
+ 2018-12-02,14.6,5.2,10.6,16.6,16.6,0.0,21.0,18.0,35.3,63,0.0,34000.57
92
+ 2018-12-03,12.8,6.2,10.1,0.0,0.0,0.0,0.0,26.3,48.6,3,26693.77,33927.42
93
+ 2018-12-04,6.2,-2.0,2.0,0.0,0.0,0.0,0.0,23.0,44.6,3,29895.79,33857.86
94
+ 2018-12-05,2.2,-3.8,-1.1,0.0,0.0,0.0,0.0,13.6,28.1,3,20367.7,33792.06
95
+ 2018-12-06,4.2,-3.0,0.7,0.0,0.0,0.0,0.0,21.4,41.0,3,25925.27,33730.13
96
+ 2018-12-07,2.7,-4.2,0.2,0.0,0.0,0.0,0.0,21.9,43.9,1,29926.36,33672.25
97
+ 2018-12-08,1.9,-4.9,-1.8,0.0,0.0,0.0,0.0,19.8,39.2,3,19941.12,33618.54
98
+ 2018-12-09,1.6,-4.4,-1.4,0.0,0.0,0.0,0.0,12.3,27.4,3,20439.16,33569.16
99
+ 2018-12-10,3.8,-3.5,-0.7,0.0,0.0,0.0,0.0,17.2,36.0,3,29698.45,33524.03
100
+ 2018-12-11,3.7,-3.9,-0.7,0.0,0.0,0.0,0.0,18.7,33.8,1,29673.37,33482.58
101
+ 2018-12-12,5.1,-2.0,1.1,0.0,0.0,0.0,0.0,17.1,34.9,3,24647.14,33444.81
102
+ 2018-12-13,3.8,-0.9,1.6,0.9,0.5,0.28,6.0,19.8,37.1,71,6975.15,33410.76
103
+ 2018-12-14,9.9,2.8,6.3,1.4,1.4,0.0,5.0,15.5,29.9,51,6838.5,33380.45
104
+ 2018-12-15,9.6,5.9,7.6,4.7,4.7,0.0,11.0,25.3,43.2,53,0.0,33353.91
105
+ 2018-12-16,6.7,1.6,3.9,23.1,21.7,0.98,22.0,28.0,51.1,73,0.0,33331.19
106
+ 2018-12-17,7.6,1.8,4.1,0.6,0.6,0.0,3.0,29.3,52.9,51,18524.88,33312.29
107
+ 2018-12-18,2.3,-3.3,-0.4,0.0,0.0,0.0,0.0,29.8,53.6,3,29200.12,33297.25
108
+ 2018-12-19,4.7,-4.6,-0.6,0.0,0.0,0.0,0.0,13.8,26.6,3,29139.29,33286.09
109
+ 2018-12-20,10.6,-2.4,3.4,7.7,7.7,0.0,9.0,18.5,31.3,61,21520.87,33278.84
110
+ 2018-12-21,14.6,10.0,12.4,43.6,43.6,0.0,16.0,38.6,82.8,65,0.0,33275.5
111
+ 2018-12-22,9.9,3.9,6.5,3.3,3.3,0.0,3.0,31.1,60.5,61,18855.64,33276.11
112
+ 2018-12-23,5.8,-0.2,2.7,0.0,0.0,0.0,0.0,22.9,45.0,3,28909.64,33280.67
113
+ 2018-12-24,6.8,0.6,2.9,2.6,0.7,1.33,11.0,19.3,36.7,73,18146.82,33289.2
114
+ 2018-12-25,4.7,-1.4,1.2,0.0,0.0,0.0,0.0,18.6,38.9,3,25764.34,33301.71
115
+ 2018-12-26,5.8,-2.5,1.0,0.0,0.0,0.0,0.0,18.0,34.2,3,16403.13,33318.21
116
+ 2018-12-27,4.1,0.1,1.9,0.0,0.0,0.0,0.0,16.0,28.4,3,28955.64,33338.71
117
+ 2018-12-28,13.4,4.9,10.2,28.5,28.5,0.0,20.0,27.7,62.3,63,0.0,33363.21
118
+ 2018-12-29,12.4,1.9,8.2,0.1,0.1,0.0,1.0,25.8,49.0,51,28989.88,33391.73
119
+ 2018-12-30,5.2,-0.6,1.7,0.3,0.0,0.21,2.0,13.1,24.1,71,12333.29,33424.39
120
+ 2018-12-31,7.9,-1.1,3.4,22.1,22.1,0.0,10.0,24.5,43.6,63,5883.96,33461.48
121
+ 2019-01-01,14.2,3.2,9.4,3.1,3.1,0.0,3.0,38.0,67.7,61,18452.99,33502.93
122
+ 2019-01-02,3.7,-0.6,1.9,0.0,0.0,0.0,0.0,15.8,28.4,3,19357.07,33548.61
123
+ 2019-01-03,7.1,0.3,4.2,0.0,0.0,0.0,0.0,24.2,47.2,3,22127.57,33598.4
124
+ 2019-01-04,8.0,-1.4,2.8,0.0,0.0,0.0,0.0,19.6,37.8,3,24464.69,33652.18
125
+ 2019-01-05,6.7,3.0,5.4,20.6,20.6,0.0,18.0,22.7,42.1,63,0.0,33709.84
126
+ 2019-01-06,8.6,-1.0,3.8,0.2,0.2,0.0,1.0,30.0,56.9,51,28503.95,33771.23
127
+ 2019-01-07,0.2,-5.3,-2.1,0.2,0.1,0.07,2.0,17.2,32.4,71,23996.83,33836.26
128
+ 2019-01-08,8.2,0.4,4.3,0.2,0.2,0.0,2.0,13.4,33.8,51,10552.66,33904.79
129
+ 2019-01-09,7.1,0.3,4.4,3.6,3.6,0.0,4.0,34.6,65.2,61,25488.23,33976.69
130
+ 2019-01-10,0.8,-2.1,-0.4,0.0,0.0,0.0,0.0,36.7,68.4,3,20963.05,34051.85
131
+ 2019-01-11,-1.4,-5.9,-3.9,0.0,0.0,0.0,0.0,26.3,51.5,3,29951.96,34130.14
132
+ 2019-01-12,2.3,-6.4,-2.8,0.0,0.0,0.0,0.0,10.6,20.9,3,29357.16,34211.44
133
+ 2019-01-13,0.3,-4.1,-2.5,0.8,0.0,0.56,6.0,17.3,32.8,71,25903.73,34295.64
134
+ 2019-01-14,1.1,-6.4,-3.7,0.0,0.0,0.0,0.0,20.3,37.1,3,30076.59,34382.6
135
+ 2019-01-15,3.5,-4.8,-1.9,0.0,0.0,0.0,0.0,9.5,23.4,3,29494.34,34472.21
136
+ 2019-01-16,4.6,-4.1,-0.2,0.0,0.0,0.0,0.0,22.4,43.9,3,30002.61,34564.36
137
+ 2019-01-17,-0.2,-5.5,-2.6,0.3,0.0,0.21,1.0,18.2,33.1,73,22266.33,34658.92
138
+ 2019-01-18,3.6,-2.2,0.3,1.4,0.0,0.98,8.0,12.3,21.6,73,13850.21,34755.77
139
+ 2019-01-19,3.2,-3.7,-0.1,5.3,0.6,3.29,4.0,19.7,35.3,75,14658.24,34855.22
140
+ 2019-01-20,8.8,-11.5,-0.7,29.6,27.3,1.61,13.0,31.3,61.2,75,6425.02,34958.25
141
+ 2019-01-21,-11.9,-15.8,-13.4,0.0,0.0,0.0,0.0,33.3,62.6,3,30776.06,35064.75
142
+ 2019-01-22,-2.2,-12.1,-7.3,0.0,0.0,0.0,0.0,19.5,36.0,3,30510.7,35174.51
143
+ 2019-01-23,7.7,-3.5,1.8,0.1,0.1,0.0,1.0,22.9,40.7,51,11909.11,35287.37
144
+ 2019-01-24,12.6,0.1,8.4,34.4,34.4,0.0,18.0,42.9,92.2,63,0.0,35403.12
145
+ 2019-01-25,2.8,-3.8,-1.0,0.0,0.0,0.0,0.0,31.6,59.0,3,26179.88,35521.59
146
+ 2019-01-26,1.0,-6.3,-2.9,0.0,0.0,0.0,0.0,13.0,29.5,3,31924.3,35642.61
147
+ 2019-01-27,9.6,-2.7,3.0,0.0,0.0,0.0,0.0,28.8,55.4,3,28750.15,35765.98
148
+ 2019-01-28,1.3,-5.8,-2.8,0.0,0.0,0.0,0.0,18.9,34.6,3,31577.17,35891.54
149
+ 2019-01-29,4.3,-5.8,-0.2,5.7,5.1,0.42,12.0,22.8,40.3,73,0.0,36019.12
150
+ 2019-01-30,0.3,-14.8,-6.6,0.4,0.0,0.28,3.0,38.4,71.6,71,26884.57,36148.55
151
+ 2019-01-31,-8.4,-16.6,-12.5,0.0,0.0,0.0,0.0,21.3,44.6,0,32793.02,36279.68
152
+ 2019-02-01,-4.8,-12.1,-8.8,0.0,0.0,0.0,0.0,13.2,24.1,3,27911.26,36412.32
153
+ 2019-02-02,2.5,-9.9,-4.2,0.0,0.0,0.0,0.0,22.8,43.9,3,32403.46,36546.34
154
+ 2019-02-03,10.0,-3.5,1.9,0.0,0.0,0.0,0.0,16.6,32.4,2,33226.9,36681.56
155
+ 2019-02-04,11.9,-1.8,3.9,0.0,0.0,0.0,0.0,14.1,30.6,1,33373.6,36817.85
156
+ 2019-02-05,17.6,0.7,7.5,0.0,0.0,0.0,0.0,17.4,29.9,3,31915.86,36955.05
157
+ 2019-02-06,7.1,-1.3,3.0,9.9,9.9,0.0,6.0,17.0,31.0,63,27012.72,37093.02
158
+ 2019-02-07,7.8,2.4,5.0,5.1,5.1,0.0,5.0,15.6,27.0,61,3400.59,37231.61
159
+ 2019-02-08,11.2,-0.9,5.4,9.0,9.0,0.0,12.0,33.3,61.6,61,17612.41,37371.16
160
+ 2019-02-09,0.6,-5.2,-3.0,0.0,0.0,0.0,0.0,31.8,62.3,2,34118.37,37512.93
161
+ 2019-02-10,2.8,-6.2,-2.2,0.0,0.0,0.0,0.0,13.2,28.1,3,34271.85,37656.82
162
+ 2019-02-11,2.8,-2.0,-0.0,0.3,0.0,0.21,3.0,11.6,23.0,71,12777.81,37802.67
163
+ 2019-02-12,0.9,-2.8,-1.3,16.4,10.9,3.85,16.0,26.7,49.3,75,0.0,37950.3
164
+ 2019-02-13,5.7,-1.8,2.0,2.2,2.2,0.0,4.0,34.9,63.7,53,21005.38,38099.58
165
+ 2019-02-14,7.1,-3.0,1.5,0.0,0.0,0.0,0.0,19.6,39.2,3,34822.73,38250.33
166
+ 2019-02-15,13.0,2.8,8.3,0.3,0.3,0.0,3.0,25.2,42.1,51,22442.48,38402.41
167
+ 2019-02-16,5.8,-2.3,1.8,0.0,0.0,0.0,0.0,20.6,45.0,3,34997.9,38555.66
168
+ 2019-02-17,2.3,-5.4,-1.2,1.1,1.1,0.0,3.0,14.8,31.3,53,35076.93,38709.95
169
+ 2019-02-18,6.4,-3.8,0.9,4.0,2.7,0.91,9.0,29.0,55.1,73,10243.74,38865.12
170
+ 2019-02-19,2.4,-5.6,-2.4,0.0,0.0,0.0,0.0,19.5,36.0,3,35339.56,39021.04
171
+ 2019-02-20,-0.8,-4.2,-2.7,12.0,4.1,5.53,13.0,16.3,47.9,75,0.0,39177.56
172
+ 2019-02-21,11.3,-0.1,4.4,3.7,3.7,0.0,4.0,19.8,38.9,61,35080.43,39334.55
173
+ 2019-02-22,7.0,-0.4,3.1,0.0,0.0,0.0,0.0,17.3,34.2,3,27432.9,39491.88
174
+ 2019-02-23,4.6,-2.5,1.1,1.5,1.5,0.0,3.0,11.9,24.8,53,20485.7,39649.42
175
+ 2019-02-24,9.4,2.1,6.4,19.9,19.9,0.0,13.0,35.5,68.8,63,1091.64,39807.03
176
+ 2019-02-25,5.0,-3.0,1.7,0.0,0.0,0.0,0.0,44.6,82.4,3,33542.79,39964.58
177
+ 2019-02-26,3.0,-4.8,-2.1,0.0,0.0,0.0,0.0,25.3,50.8,3,36472.96,40121.95
178
+ 2019-02-27,-1.7,-5.5,-3.7,0.5,0.0,0.35,4.0,18.3,36.7,71,14046.06,40279.02
179
+ 2019-02-28,2.7,-4.2,-1.1,0.3,0.0,0.21,2.0,26.1,51.5,71,34680.77,40436.11
180
+ 2019-03-01,2.8,-3.5,-0.6,2.1,0.0,1.47,6.0,13.3,26.3,73,7248.83,40594.44
181
+ 2019-03-02,3.7,-1.0,1.3,9.5,0.0,6.72,9.0,18.4,33.8,75,25391.91,40753.93
182
+ 2019-03-03,5.7,-0.9,1.9,10.0,0.3,6.79,7.0,14.5,28.1,75,28063.99,40914.49
183
+ 2019-03-04,2.3,-5.7,-0.2,16.1,0.0,11.27,6.0,21.3,38.2,75,31227.24,41075.97
184
+ 2019-03-05,-1.6,-14.6,-6.2,0.0,0.0,0.0,0.0,19.0,34.2,2,37628.05,41238.26
185
+ 2019-03-06,-3.0,-9.8,-6.0,0.0,0.0,0.0,0.0,26.0,51.1,3,29823.71,41401.25
186
+ 2019-03-07,0.6,-9.2,-4.5,0.0,0.0,0.0,0.0,26.3,51.5,3,32797.89,41564.82
187
+ 2019-03-08,2.6,-7.2,-2.0,0.0,0.0,0.0,0.0,14.3,29.5,3,34844.63,41728.84
188
+ 2019-03-09,8.2,-4.1,1.4,0.0,0.0,0.0,0.0,13.0,23.8,3,38169.29,41893.22
189
+ 2019-03-10,7.8,1.1,3.6,14.0,11.1,2.03,10.0,24.2,46.8,75,0.0,42057.83
190
+ 2019-03-11,10.6,2.0,6.3,0.0,0.0,0.0,0.0,26.0,49.7,3,38610.58,42222.57
191
+ 2019-03-12,5.0,-1.5,1.7,0.0,0.0,0.0,0.0,25.0,49.0,3,38898.82,42387.31
192
+ 2019-03-13,6.6,-4.9,1.7,0.0,0.0,0.0,0.0,17.2,33.5,3,33571.29,42551.96
193
+ 2019-03-14,11.1,0.8,6.8,0.0,0.0,0.0,0.0,21.5,40.3,3,38560.82,42716.4
194
+ 2019-03-15,21.8,9.8,15.3,20.4,20.4,0.0,7.0,29.4,53.3,65,17998.09,42880.52
195
+ 2019-03-16,10.3,1.7,6.6,0.0,0.0,0.0,0.0,29.9,57.2,3,37108.74,43044.21
196
+ 2019-03-17,5.9,-2.0,1.7,0.0,0.0,0.0,0.0,20.0,40.0,2,40026.87,43207.36
197
+ 2019-03-18,7.1,-4.1,1.5,0.0,0.0,0.0,0.0,16.9,34.9,3,35062.98,43369.86
198
+ 2019-03-19,8.0,-3.2,2.6,0.0,0.0,0.0,0.0,16.2,33.5,3,32147.55,43531.59
199
+ 2019-03-20,9.9,-1.5,4.1,0.0,0.0,0.0,0.0,24.6,47.2,3,39737.39,43692.88
200
+ 2019-03-21,8.6,3.0,5.9,18.3,18.3,0.0,20.0,25.9,51.8,63,0.0,43854.82
201
+ 2019-03-22,9.3,2.7,5.8,13.2,13.2,0.0,14.0,37.7,68.8,63,13580.49,44017.36
202
+ 2019-03-23,8.4,-0.2,3.7,0.3,0.0,0.21,3.0,34.2,62.6,71,31687.92,44180.42
203
+ 2019-03-24,13.3,-1.4,6.5,0.0,0.0,0.0,0.0,20.8,39.2,3,28146.08,44343.9
204
+ 2019-03-25,9.1,3.0,7.6,1.1,1.1,0.0,5.0,17.6,36.4,51,3276.7,44507.68
205
+ 2019-03-26,7.9,-2.5,2.8,0.0,0.0,0.0,0.0,21.8,42.5,1,41493.85,44671.69
206
+ 2019-03-27,6.4,-2.6,1.9,0.0,0.0,0.0,0.0,15.8,33.1,3,41656.98,44835.8
207
+ 2019-03-28,8.8,-2.5,4.1,0.0,0.0,0.0,0.0,24.9,48.2,3,39910.41,44999.93
208
+ 2019-03-29,12.6,5.0,9.1,0.8,0.8,0.0,5.0,12.2,25.6,51,921.16,45163.97
209
+ 2019-03-30,16.2,6.8,11.4,0.0,0.0,0.0,0.0,29.7,54.7,3,33658.74,45327.82
210
+ 2019-03-31,17.6,3.5,10.2,4.6,4.6,0.0,5.0,26.4,52.9,61,19279.57,45491.37
211
+ 2019-04-01,7.4,-0.9,3.0,0.0,0.0,0.0,0.0,31.3,59.4,3,42467.27,45654.52
212
+ 2019-04-02,8.7,-2.0,4.2,1.0,1.0,0.0,6.0,23.7,47.9,51,28598.75,45817.16
213
+ 2019-04-03,16.5,2.1,9.6,0.5,0.5,0.0,3.0,38.5,71.3,51,42786.98,45979.19
214
+ 2019-04-04,13.4,2.0,7.7,0.0,0.0,0.0,0.0,27.1,53.6,3,42945.48,46140.5
215
+ 2019-04-05,5.3,1.5,3.6,7.3,7.3,0.0,14.0,15.7,31.0,61,1656.07,46300.98
216
+ 2019-04-06,20.0,3.6,10.1,0.2,0.2,0.0,2.0,21.1,41.0,51,39557.13,46460.52
217
+ 2019-04-07,16.1,4.2,10.5,0.0,0.0,0.0,0.0,18.5,38.2,3,36196.37,46619.0
218
+ 2019-04-08,24.2,8.6,15.8,12.6,12.6,0.0,8.0,23.6,57.6,63,24721.92,46776.31
219
+ 2019-04-09,12.2,4.4,8.6,0.9,0.9,0.0,2.0,21.0,50.0,53,36397.97,46932.74
220
+ 2019-04-10,14.2,4.3,9.0,0.0,0.0,0.0,0.0,24.3,48.2,3,43200.0,47089.33
221
+ 2019-04-11,10.4,2.7,6.3,0.0,0.0,0.0,0.0,14.1,28.1,3,36565.94,47246.04
222
+ 2019-04-12,18.6,6.2,12.2,15.2,15.2,0.0,9.0,31.0,56.5,63,22271.96,47402.77
223
+ 2019-04-13,23.0,14.1,17.5,12.6,12.6,0.0,9.0,22.5,46.8,63,22158.37,47559.43
224
+ 2019-04-14,18.5,10.9,15.2,1.9,1.9,0.0,5.0,25.6,43.6,55,22183.88,47715.9
225
+ 2019-04-15,18.5,7.2,13.4,15.0,15.0,0.0,10.0,37.6,81.0,63,29649.22,47872.08
226
+ 2019-04-16,17.0,5.1,10.9,0.3,0.3,0.0,1.0,28.1,54.4,51,43705.63,48027.87
227
+ 2019-04-17,14.9,7.8,10.4,0.4,0.4,0.0,1.0,20.8,38.2,51,36000.0,48183.14
228
+ 2019-04-18,16.6,8.6,12.5,1.7,1.7,0.0,8.0,24.2,47.9,53,14400.0,48337.8
229
+ 2019-04-19,23.6,15.4,18.5,5.4,5.4,0.0,10.0,32.4,58.7,63,32775.27,48491.73
230
+ 2019-04-20,17.7,11.4,15.4,11.8,11.8,0.0,13.0,31.8,73.1,61,360.09,48644.81
231
+ 2019-04-21,16.9,9.9,13.3,0.3,0.3,0.0,2.0,20.9,41.0,51,35473.65,48796.92
232
+ 2019-04-22,17.0,9.5,13.4,8.1,8.1,0.0,15.0,22.9,43.9,61,14815.11,48947.95
233
+ 2019-04-23,22.8,9.8,15.8,0.2,0.2,0.0,2.0,21.9,42.5,51,43296.33,49097.76
234
+ 2019-04-24,19.5,10.1,16.0,0.0,0.0,0.0,0.0,20.9,43.9,3,44698.35,49246.24
235
+ 2019-04-25,16.2,7.5,11.4,1.9,1.9,0.0,6.0,17.0,36.0,53,32403.16,49393.25
236
+ 2019-04-26,16.6,10.2,13.4,18.9,18.9,0.0,21.0,28.9,58.3,61,5904.13,49538.66
237
+ 2019-04-27,12.1,7.4,10.1,3.1,3.1,0.0,4.0,38.2,72.7,61,35685.02,49682.34
238
+ 2019-04-28,10.8,5.8,8.5,1.9,1.9,0.0,6.0,18.9,36.0,55,9822.66,49824.16
239
+ 2019-04-29,13.1,3.1,8.5,0.9,0.9,0.0,2.0,18.3,36.7,53,38090.4,49964.36
240
+ 2019-04-30,18.9,8.6,12.1,3.7,3.7,0.0,6.0,15.0,31.7,55,13472.75,50103.94
241
+ 2019-05-01,12.4,8.8,10.6,1.7,1.7,0.0,6.0,18.8,38.2,53,88.17,50242.86
242
+ 2019-05-02,23.9,10.2,14.7,5.7,5.7,0.0,7.0,19.6,44.3,61,39169.56,50381.0
243
+ 2019-05-03,14.1,9.5,11.5,1.1,1.1,0.0,4.0,12.7,25.6,51,0.0,50518.27
244
+ 2019-05-04,20.6,10.8,14.9,9.6,9.6,0.0,11.0,8.3,19.1,61,19398.42,50654.52
245
+ 2019-05-05,13.3,10.9,12.0,32.3,32.3,0.0,24.0,24.5,44.6,63,0.0,50789.64
246
+ 2019-05-06,20.5,9.4,14.0,0.2,0.2,0.0,2.0,18.5,33.5,51,40686.59,50923.5
247
+ 2019-05-07,19.1,10.2,14.4,4.3,4.3,0.0,5.0,18.9,39.6,61,45192.72,51055.99
248
+ 2019-05-08,19.4,10.1,14.2,0.0,0.0,0.0,0.0,16.8,33.1,3,35886.93,51186.98
249
+ 2019-05-09,15.2,9.4,12.0,0.1,0.1,0.0,1.0,21.4,42.1,51,18233.76,51316.32
250
+ 2019-05-10,22.6,12.1,17.8,2.3,2.3,0.0,6.0,21.4,40.7,53,16369.52,51443.89
251
+ 2019-05-11,19.5,11.2,16.0,3.9,3.9,0.0,5.0,19.3,40.0,61,39146.99,51569.55
252
+ 2019-05-12,12.8,6.8,9.0,28.7,28.7,0.0,23.0,32.6,61.6,63,0.0,51693.18
253
+ 2019-05-13,10.9,6.8,8.5,17.7,17.7,0.0,14.0,20.5,41.0,63,3600.0,51814.6
254
+ 2019-05-14,11.9,6.9,9.0,5.1,5.1,0.0,12.0,16.4,30.2,53,1020.25,51933.69
255
+ 2019-05-15,19.4,6.1,13.0,0.5,0.5,0.0,3.0,22.3,43.6,51,45486.12,52050.3
256
+ 2019-05-16,20.6,9.6,15.7,0.8,0.8,0.0,6.0,15.4,36.4,51,48691.21,52164.29
257
+ 2019-05-17,24.4,11.9,18.8,0.2,0.2,0.0,2.0,17.7,35.6,51,39615.34,52275.49
258
+ 2019-05-18,22.9,11.2,16.7,0.0,0.0,0.0,0.0,16.2,28.8,3,48894.36,52383.75
259
+ 2019-05-19,22.8,12.3,18.2,2.4,2.4,0.0,4.0,25.6,49.0,55,47441.0,52489.26
260
+ 2019-05-20,28.0,18.7,23.1,1.7,1.7,0.0,5.0,26.5,61.9,53,47632.42,52592.85
261
+ 2019-05-21,19.8,12.4,16.0,0.0,0.0,0.0,0.0,26.3,50.4,3,49181.6,52694.46
262
+ 2019-05-22,22.2,10.4,16.7,0.0,0.0,0.0,0.0,16.2,32.4,3,46659.08,52794.0
263
+ 2019-05-23,24.9,14.1,18.8,0.8,0.8,0.0,3.0,23.0,43.9,53,37003.31,52891.37
264
+ 2019-05-24,23.2,14.6,19.6,0.3,0.3,0.0,1.0,32.8,64.1,51,47938.18,52986.44
265
+ 2019-05-25,19.5,12.2,15.8,0.0,0.0,0.0,0.0,26.2,50.4,3,49535.76,53079.13
266
+ 2019-05-26,29.0,15.1,21.8,3.0,3.0,0.0,5.0,20.9,39.6,61,43200.0,53169.33
267
+ 2019-05-27,25.5,15.6,21.0,0.0,0.0,0.0,0.0,15.9,32.4,3,49656.98,53256.91
268
+ 2019-05-28,20.8,15.9,18.4,6.9,6.9,0.0,9.0,30.8,57.6,61,15997.57,53341.79
269
+ 2019-05-29,21.0,13.8,17.9,2.3,2.3,0.0,6.0,20.1,37.4,53,19328.21,53423.84
270
+ 2019-05-30,20.4,13.2,16.8,14.4,14.4,0.0,9.0,17.0,42.8,63,13643.93,53502.95
271
+ 2019-05-31,25.0,17.1,21.1,0.4,0.4,0.0,4.0,14.6,29.2,51,49836.14,53579.02
272
+ 2019-06-01,25.9,15.6,19.9,0.0,0.0,0.0,0.0,17.4,37.4,3,49878.14,53651.92
273
+ 2019-06-02,25.9,16.2,20.8,1.8,1.8,0.0,5.0,20.1,46.1,55,42366.16,53721.55
274
+ 2019-06-03,20.8,11.9,16.7,0.0,0.0,0.0,0.0,25.8,50.4,2,49958.14,53787.8
275
+ 2019-06-04,21.4,10.9,16.3,0.0,0.0,0.0,0.0,18.7,37.1,3,49995.97,53850.55
276
+ 2019-06-05,26.2,16.2,21.0,6.4,6.4,0.0,5.0,21.2,42.8,63,29000.61,53909.7
277
+ 2019-06-06,27.2,18.9,23.1,8.8,8.8,0.0,4.0,19.3,35.6,63,50066.87,53965.12
278
+ 2019-06-07,25.5,16.7,20.5,0.0,0.0,0.0,0.0,16.9,33.8,3,45892.7,54016.7
279
+ 2019-06-08,25.9,15.3,20.3,0.0,0.0,0.0,0.0,21.4,42.1,3,50131.32,54064.52
280
+ 2019-06-09,23.7,13.6,18.5,0.0,0.0,0.0,0.0,22.1,43.9,3,50161.96,54108.94
281
+ 2019-06-10,20.5,13.6,17.6,15.8,15.8,0.0,18.0,23.9,43.2,61,0.0,54149.97
282
+ 2019-06-11,23.5,15.8,20.1,8.4,8.4,0.0,9.0,27.6,54.7,61,36240.15,54187.59
283
+ 2019-06-12,22.2,13.1,17.8,0.0,0.0,0.0,0.0,21.6,42.5,3,50247.64,54221.75
284
+ 2019-06-13,19.3,14.9,16.8,20.3,20.3,0.0,11.0,29.9,57.2,63,24408.88,54252.45
285
+ 2019-06-14,22.6,14.3,17.9,0.3,0.3,0.0,3.0,27.9,53.3,51,50298.79,54279.64
286
+ 2019-06-15,26.7,13.1,20.5,0.0,0.0,0.0,0.0,24.1,46.8,3,46391.4,54303.31
287
+ 2019-06-16,26.5,17.8,22.3,4.4,4.4,0.0,13.0,19.7,45.0,53,27321.03,54323.45
288
+ 2019-06-17,23.5,19.3,21.2,6.0,6.0,0.0,12.0,11.9,30.2,61,11254.65,54340.01
289
+ 2019-06-18,24.7,19.2,21.0,15.0,15.0,0.0,19.0,15.2,34.9,63,2926.9,54352.99
290
+ 2019-06-19,21.1,18.0,19.8,12.9,12.9,0.0,14.0,15.6,34.6,61,5531.7,54362.37
291
+ 2019-06-20,26.2,19.0,22.1,10.6,10.6,0.0,15.0,15.8,39.2,63,21182.92,54368.12
292
+ 2019-06-21,24.1,18.2,20.5,17.1,17.1,0.0,13.0,25.2,47.2,63,25238.35,54370.26
293
+ 2019-06-22,24.2,15.8,20.4,0.0,0.0,0.0,0.0,23.1,43.9,2,50400.0,54368.75
294
+ 2019-06-23,26.8,15.4,21.7,0.0,0.0,0.0,0.0,17.9,32.0,3,50400.0,54363.59
295
+ 2019-06-24,28.0,16.1,22.4,0.0,0.0,0.0,0.0,11.8,22.0,3,36018.93,54354.77
296
+ 2019-06-25,28.0,20.4,23.3,10.6,10.6,0.0,11.0,11.4,28.1,63,19588.62,54342.28
297
+ 2019-06-26,29.8,20.4,25.0,0.0,0.0,0.0,0.0,17.3,31.0,3,50400.0,54326.11
lr_attendance/prepare_for_modeling.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
4
+ from sklearn.model_selection import train_test_split
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+
8
+ # Load the complete feature dataset
9
+ df = pd.read_csv("attendance_features_complete.csv")
10
+
11
+ print("=== FEATURE ENGINEERING ANALYSIS ===")
12
+ print(f"Dataset shape: {df.shape}")
13
+ print(f"Target variable: attendance_rate")
14
+
15
+ # Target variable statistics
16
+ print("\n=== TARGET VARIABLE ANALYSIS ===")
17
+ print("Attendance Rate Statistics:")
18
+ print(df["attendance_rate"].describe())
19
+
20
+ # Create target categories for analysis
21
+ df["attendance_category"] = pd.cut(
22
+ df["attendance_rate"],
23
+ bins=[0, 85, 92, 95, 100],
24
+ labels=["Poor", "Average", "Good", "Excellent"],
25
+ )
26
+
27
+ print("\nAttendance Categories:")
28
+ print(df["attendance_category"].value_counts())
29
+
30
+ # Feature list by category
31
+ temporal_features = [
32
+ "day_of_week",
33
+ "month",
34
+ "quarter",
35
+ "week_of_year",
36
+ "day_of_month",
37
+ "day_of_year",
38
+ "is_weekend",
39
+ "is_school_day",
40
+ "is_month_start",
41
+ "is_month_end",
42
+ "is_friday",
43
+ "is_monday",
44
+ "school_year_progress",
45
+ ]
46
+
47
+ holiday_features = ["is_holiday", "days_to_next_holiday", "days_since_last_holiday"]
48
+
49
+ weather_features = [
50
+ "temp_max",
51
+ "temp_min",
52
+ "temp_mean",
53
+ "temp_range",
54
+ "precipitation_total",
55
+ "rain_total",
56
+ "snow_total",
57
+ "precipitation_hours",
58
+ "wind_speed_max",
59
+ "wind_gust_max",
60
+ "sunshine_duration",
61
+ "daylight_duration",
62
+ "is_rainy_day",
63
+ "is_snowy_day",
64
+ "is_windy_day",
65
+ "is_extreme_temp",
66
+ "weather_severity",
67
+ ]
68
+
69
+ # School-level features
70
+ school_features = ["School DBN"]
71
+
72
+ # Target variable
73
+ target = "attendance_rate"
74
+
75
+ print(f"\n=== FEATURE CATEGORIES ===")
76
+ print(f"Temporal features: {len(temporal_features)}")
77
+ print(f"Holiday features: {len(holiday_features)}")
78
+ print(f"Weather features: {len(weather_features)}")
79
+ print(f"School features: {len(school_features)}")
80
+
81
+ # Check for missing values
82
+ print("\n=== MISSING VALUES ANALYSIS ===")
83
+ all_features = temporal_features + holiday_features + weather_features
84
+ missing_analysis = df[all_features + [target]].isnull().sum()
85
+ print(missing_analysis[missing_analysis > 0])
86
+
87
+ # Correlation analysis
88
+ print("\n=== CORRELATION ANALYSIS ===")
89
+ numeric_features = df[
90
+ temporal_features + holiday_features + weather_features + [target]
91
+ ].select_dtypes(include=[np.number])
92
+ correlation_matrix = numeric_features.corr()
93
+
94
+ # Top correlations with target
95
+ target_correlations = correlation_matrix[target].abs().sort_values(ascending=False)
96
+ print("Top 15 features correlated with attendance rate:")
97
+ print(target_correlations.head(16)[1:]) # Exclude self-correlation
98
+
99
+ # Feature importance for linear regression (high correlation features)
100
+ high_corr_features = target_correlations[target_correlations > 0.1].index.tolist()
101
+ print(f"\nFeatures with correlation > 0.1: {len(high_corr_features)}")
102
+ print(high_corr_features)
103
+
104
+ # Prepare data for modeling
105
+ print("\n=== DATA PREPARATION FOR MODELING ===")
106
+
107
+ # Handle missing values in weather features
108
+ df_clean = df.copy()
109
+ for feature in weather_features:
110
+ if df_clean[feature].isnull().sum() > 0:
111
+ # Fill with median for numeric features
112
+ df_clean[feature] = df_clean[feature].fillna(df_clean[feature].median())
113
+
114
+ # Create interaction features
115
+ df_clean["temp_humidity_interaction"] = (
116
+ df_clean["temp_mean"] * df_clean["precipitation_total"]
117
+ )
118
+ df_clean["wind_precip_interaction"] = (
119
+ df_clean["wind_speed_max"] * df_clean["precipitation_total"]
120
+ )
121
+ df_clean["holiday_weather_interaction"] = (
122
+ df_clean["is_holiday"] * df_clean["weather_severity"]
123
+ )
124
+
125
+ # Polynomial features for important continuous variables
126
+ df_clean["temp_squared"] = df_clean["temp_mean"] ** 2
127
+ df_clean["precipitation_squared"] = df_clean["precipitation_total"] ** 2
128
+
129
+ # Encoding categorical features
130
+ le = LabelEncoder()
131
+ df_clean["season_encoded"] = le.fit_transform(df_clean["season"])
132
+
133
+ # Final feature list
134
+ final_features = (
135
+ temporal_features
136
+ + holiday_features
137
+ + weather_features
138
+ + [
139
+ "temp_humidity_interaction",
140
+ "wind_precip_interaction",
141
+ "holiday_weather_interaction",
142
+ "temp_squared",
143
+ "precipitation_squared",
144
+ "season_encoded",
145
+ ]
146
+ )
147
+
148
+ # Remove any remaining non-numeric or problematic features
149
+ final_features = [
150
+ f for f in final_features if f in df_clean.columns and df_clean[f].dtype != "object"
151
+ ]
152
+
153
+ print(f"Final feature count for modeling: {len(final_features)}")
154
+
155
+ # Split data
156
+ X = df_clean[final_features]
157
+ y = df_clean[target]
158
+
159
+ # Remove rows with missing target
160
+ mask = ~y.isnull()
161
+ X = X[mask]
162
+ y = y[mask]
163
+
164
+ print(f"Final dataset shape for modeling: {X.shape}")
165
+
166
+ # Train-test split
167
+ X_train, X_test, y_train, y_test = train_test_split(
168
+ X, y, test_size=0.2, random_state=42
169
+ )
170
+
171
+ print(f"Training set: {X_train.shape}")
172
+ print(f"Test set: {X_test.shape}")
173
+
174
+ # Feature scaling
175
+ scaler = StandardScaler()
176
+ X_train_scaled = scaler.fit_transform(X_train)
177
+ X_test_scaled = scaler.transform(X_test)
178
+
179
+ # Save prepared datasets
180
+ train_data = pd.DataFrame(X_train_scaled, columns=final_features)
181
+ train_data["attendance_rate"] = y_train.values
182
+
183
+ test_data = pd.DataFrame(X_test_scaled, columns=final_features)
184
+ test_data["attendance_rate"] = y_test.values
185
+
186
+ train_data.to_csv("train_data_scaled.csv", index=False)
187
+ test_data.to_csv("test_data_scaled.csv", index=False)
188
+
189
+ # Save feature information
190
+ feature_info = {
191
+ "final_features": final_features,
192
+ "temporal_features": temporal_features,
193
+ "holiday_features": holiday_features,
194
+ "weather_features": weather_features,
195
+ "target_correlations": target_correlations.to_dict(),
196
+ }
197
+
198
+ import json
199
+
200
+ with open("feature_info.json", "w") as f:
201
+ json.dump(feature_info, f, indent=2)
202
+
203
+ print("\n=== DATASETS SAVED ===")
204
+ print("v train_data_scaled.csv - Training data with scaled features")
205
+ print("v test_data_scaled.csv - Test data with scaled features")
206
+ print("v feature_info.json - Feature metadata and correlations")
207
+
208
+ print(f"\n=== FEATURE ENGINEERING SUMMARY ===")
209
+ print(f"v Enhanced date column with {len(temporal_features)} temporal features")
210
+ print(f"v Added {len(holiday_features)} holiday-related features")
211
+ print(f"v Integrated {len(weather_features)} weather features")
212
+ print(f"v Created interaction and polynomial features")
213
+ print(f"v Final dataset ready for multiple linear regression")
214
+ print(f"v Average attendance rate: {df['attendance_rate'].mean():.2f}%")
215
+ print(f"v Features most correlated with attendance: {high_corr_features[:5]}")
lr_attendance/train.ipynb ADDED
@@ -0,0 +1,1140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 15,
6
+ "id": "ab55b6f5-72a8-42bb-ae90-53ec5bb79501",
7
+ "metadata": {
8
+ "scrolled": true
9
+ },
10
+ "outputs": [
11
+ {
12
+ "name": "stdout",
13
+ "output_type": "stream",
14
+ "text": [
15
+ "School DBN 0\n",
16
+ "Date 0\n",
17
+ "Enrolled 0\n",
18
+ "Absent 0\n",
19
+ "Present 0\n",
20
+ "Released 0\n",
21
+ "dtype: int64\n"
22
+ ]
23
+ },
24
+ {
25
+ "data": {
26
+ "text/html": [
27
+ "<div>\n",
28
+ "<style scoped>\n",
29
+ " .dataframe tbody tr th:only-of-type {\n",
30
+ " vertical-align: middle;\n",
31
+ " }\n",
32
+ "\n",
33
+ " .dataframe tbody tr th {\n",
34
+ " vertical-align: top;\n",
35
+ " }\n",
36
+ "\n",
37
+ " .dataframe thead th {\n",
38
+ " text-align: right;\n",
39
+ " }\n",
40
+ "</style>\n",
41
+ "<table border=\"1\" class=\"dataframe\">\n",
42
+ " <thead>\n",
43
+ " <tr style=\"text-align: right;\">\n",
44
+ " <th></th>\n",
45
+ " <th>Date</th>\n",
46
+ " <th>Enrolled</th>\n",
47
+ " <th>Absent</th>\n",
48
+ " <th>Present</th>\n",
49
+ " <th>Released</th>\n",
50
+ " </tr>\n",
51
+ " </thead>\n",
52
+ " <tbody>\n",
53
+ " <tr>\n",
54
+ " <th>count</th>\n",
55
+ " <td>2.771530e+05</td>\n",
56
+ " <td>277153.00000</td>\n",
57
+ " <td>277153.000000</td>\n",
58
+ " <td>277153.000000</td>\n",
59
+ " <td>277153.000000</td>\n",
60
+ " </tr>\n",
61
+ " <tr>\n",
62
+ " <th>mean</th>\n",
63
+ " <td>2.018665e+07</td>\n",
64
+ " <td>596.98617</td>\n",
65
+ " <td>50.503538</td>\n",
66
+ " <td>544.499403</td>\n",
67
+ " <td>1.983229</td>\n",
68
+ " </tr>\n",
69
+ " <tr>\n",
70
+ " <th>std</th>\n",
71
+ " <td>4.555413e+03</td>\n",
72
+ " <td>482.90966</td>\n",
73
+ " <td>54.329671</td>\n",
74
+ " <td>452.970313</td>\n",
75
+ " <td>35.114511</td>\n",
76
+ " </tr>\n",
77
+ " <tr>\n",
78
+ " <th>min</th>\n",
79
+ " <td>2.018090e+07</td>\n",
80
+ " <td>1.00000</td>\n",
81
+ " <td>0.000000</td>\n",
82
+ " <td>1.000000</td>\n",
83
+ " <td>0.000000</td>\n",
84
+ " </tr>\n",
85
+ " <tr>\n",
86
+ " <th>25%</th>\n",
87
+ " <td>2.018111e+07</td>\n",
88
+ " <td>329.00000</td>\n",
89
+ " <td>23.000000</td>\n",
90
+ " <td>291.000000</td>\n",
91
+ " <td>0.000000</td>\n",
92
+ " </tr>\n",
93
+ " <tr>\n",
94
+ " <th>50%</th>\n",
95
+ " <td>2.019013e+07</td>\n",
96
+ " <td>476.00000</td>\n",
97
+ " <td>38.000000</td>\n",
98
+ " <td>430.000000</td>\n",
99
+ " <td>0.000000</td>\n",
100
+ " </tr>\n",
101
+ " <tr>\n",
102
+ " <th>75%</th>\n",
103
+ " <td>2.019041e+07</td>\n",
104
+ " <td>684.00000</td>\n",
105
+ " <td>59.000000</td>\n",
106
+ " <td>640.000000</td>\n",
107
+ " <td>0.000000</td>\n",
108
+ " </tr>\n",
109
+ " <tr>\n",
110
+ " <th>max</th>\n",
111
+ " <td>2.019063e+07</td>\n",
112
+ " <td>5955.00000</td>\n",
113
+ " <td>2151.000000</td>\n",
114
+ " <td>5847.000000</td>\n",
115
+ " <td>5904.000000</td>\n",
116
+ " </tr>\n",
117
+ " </tbody>\n",
118
+ "</table>\n",
119
+ "</div>"
120
+ ],
121
+ "text/plain": [
122
+ " Date Enrolled Absent Present Released\n",
123
+ "count 2.771530e+05 277153.00000 277153.000000 277153.000000 277153.000000\n",
124
+ "mean 2.018665e+07 596.98617 50.503538 544.499403 1.983229\n",
125
+ "std 4.555413e+03 482.90966 54.329671 452.970313 35.114511\n",
126
+ "min 2.018090e+07 1.00000 0.000000 1.000000 0.000000\n",
127
+ "25% 2.018111e+07 329.00000 23.000000 291.000000 0.000000\n",
128
+ "50% 2.019013e+07 476.00000 38.000000 430.000000 0.000000\n",
129
+ "75% 2.019041e+07 684.00000 59.000000 640.000000 0.000000\n",
130
+ "max 2.019063e+07 5955.00000 2151.000000 5847.000000 5904.000000"
131
+ ]
132
+ },
133
+ "execution_count": 15,
134
+ "metadata": {},
135
+ "output_type": "execute_result"
136
+ }
137
+ ],
138
+ "source": [
139
+ "# import lib\n",
140
+ "import pandas as pd\n",
141
+ "from sklearn.model_selection import train_test_split\n",
142
+ "from sklearn.linear_model import LinearRegression\n",
143
+ "from sklearn.metrics import mean_absolute_error ,r2_score\n",
144
+ "from sklearn.linear_model import Ridge\n",
145
+ "import matplotlib.pyplot as plt\n",
146
+ "#data clean\n",
147
+ "\n",
148
+ "df = pd.read_csv(\"DailyPresence.csv\")\n",
149
+ "print(df.isnull().sum())\n",
150
+ "df.describe()\n"
151
+ ]
152
+ },
153
+ {
154
+ "cell_type": "code",
155
+ "execution_count": 16,
156
+ "id": "b403a040-92fc-440e-a0fc-4d7d5395c2f2",
157
+ "metadata": {},
158
+ "outputs": [],
159
+ "source": [
160
+ "# creating the variables \n",
161
+ "x = df[['Enrolled']]\n",
162
+ "y = df['Present']\n",
163
+ "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)\n"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "execution_count": 17,
169
+ "id": "240452cf-351a-48e1-8f65-9052c8700094",
170
+ "metadata": {},
171
+ "outputs": [
172
+ {
173
+ "data": {
174
+ "text/html": [
175
+ "<style>#sk-container-id-4 {\n",
176
+ " /* Definition of color scheme common for light and dark mode */\n",
177
+ " --sklearn-color-text: #000;\n",
178
+ " --sklearn-color-text-muted: #666;\n",
179
+ " --sklearn-color-line: gray;\n",
180
+ " /* Definition of color scheme for unfitted estimators */\n",
181
+ " --sklearn-color-unfitted-level-0: #fff5e6;\n",
182
+ " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
183
+ " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
184
+ " --sklearn-color-unfitted-level-3: chocolate;\n",
185
+ " /* Definition of color scheme for fitted estimators */\n",
186
+ " --sklearn-color-fitted-level-0: #f0f8ff;\n",
187
+ " --sklearn-color-fitted-level-1: #d4ebff;\n",
188
+ " --sklearn-color-fitted-level-2: #b3dbfd;\n",
189
+ " --sklearn-color-fitted-level-3: cornflowerblue;\n",
190
+ "}\n",
191
+ "\n",
192
+ "#sk-container-id-4.light {\n",
193
+ " /* Specific color for light theme */\n",
194
+ " --sklearn-color-text-on-default-background: black;\n",
195
+ " --sklearn-color-background: white;\n",
196
+ " --sklearn-color-border-box: black;\n",
197
+ " --sklearn-color-icon: #696969;\n",
198
+ "}\n",
199
+ "\n",
200
+ "#sk-container-id-4.dark {\n",
201
+ " --sklearn-color-text-on-default-background: white;\n",
202
+ " --sklearn-color-background: #111;\n",
203
+ " --sklearn-color-border-box: white;\n",
204
+ " --sklearn-color-icon: #878787;\n",
205
+ "}\n",
206
+ "\n",
207
+ "#sk-container-id-4 {\n",
208
+ " color: var(--sklearn-color-text);\n",
209
+ "}\n",
210
+ "\n",
211
+ "#sk-container-id-4 pre {\n",
212
+ " padding: 0;\n",
213
+ "}\n",
214
+ "\n",
215
+ "#sk-container-id-4 input.sk-hidden--visually {\n",
216
+ " border: 0;\n",
217
+ " clip: rect(1px 1px 1px 1px);\n",
218
+ " clip: rect(1px, 1px, 1px, 1px);\n",
219
+ " height: 1px;\n",
220
+ " margin: -1px;\n",
221
+ " overflow: hidden;\n",
222
+ " padding: 0;\n",
223
+ " position: absolute;\n",
224
+ " width: 1px;\n",
225
+ "}\n",
226
+ "\n",
227
+ "#sk-container-id-4 div.sk-dashed-wrapped {\n",
228
+ " border: 1px dashed var(--sklearn-color-line);\n",
229
+ " margin: 0 0.4em 0.5em 0.4em;\n",
230
+ " box-sizing: border-box;\n",
231
+ " padding-bottom: 0.4em;\n",
232
+ " background-color: var(--sklearn-color-background);\n",
233
+ "}\n",
234
+ "\n",
235
+ "#sk-container-id-4 div.sk-container {\n",
236
+ " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
237
+ " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
238
+ " so we also need the `!important` here to be able to override the\n",
239
+ " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
240
+ " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
241
+ " display: inline-block !important;\n",
242
+ " position: relative;\n",
243
+ "}\n",
244
+ "\n",
245
+ "#sk-container-id-4 div.sk-text-repr-fallback {\n",
246
+ " display: none;\n",
247
+ "}\n",
248
+ "\n",
249
+ "div.sk-parallel-item,\n",
250
+ "div.sk-serial,\n",
251
+ "div.sk-item {\n",
252
+ " /* draw centered vertical line to link estimators */\n",
253
+ " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
254
+ " background-size: 2px 100%;\n",
255
+ " background-repeat: no-repeat;\n",
256
+ " background-position: center center;\n",
257
+ "}\n",
258
+ "\n",
259
+ "/* Parallel-specific style estimator block */\n",
260
+ "\n",
261
+ "#sk-container-id-4 div.sk-parallel-item::after {\n",
262
+ " content: \"\";\n",
263
+ " width: 100%;\n",
264
+ " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
265
+ " flex-grow: 1;\n",
266
+ "}\n",
267
+ "\n",
268
+ "#sk-container-id-4 div.sk-parallel {\n",
269
+ " display: flex;\n",
270
+ " align-items: stretch;\n",
271
+ " justify-content: center;\n",
272
+ " background-color: var(--sklearn-color-background);\n",
273
+ " position: relative;\n",
274
+ "}\n",
275
+ "\n",
276
+ "#sk-container-id-4 div.sk-parallel-item {\n",
277
+ " display: flex;\n",
278
+ " flex-direction: column;\n",
279
+ "}\n",
280
+ "\n",
281
+ "#sk-container-id-4 div.sk-parallel-item:first-child::after {\n",
282
+ " align-self: flex-end;\n",
283
+ " width: 50%;\n",
284
+ "}\n",
285
+ "\n",
286
+ "#sk-container-id-4 div.sk-parallel-item:last-child::after {\n",
287
+ " align-self: flex-start;\n",
288
+ " width: 50%;\n",
289
+ "}\n",
290
+ "\n",
291
+ "#sk-container-id-4 div.sk-parallel-item:only-child::after {\n",
292
+ " width: 0;\n",
293
+ "}\n",
294
+ "\n",
295
+ "/* Serial-specific style estimator block */\n",
296
+ "\n",
297
+ "#sk-container-id-4 div.sk-serial {\n",
298
+ " display: flex;\n",
299
+ " flex-direction: column;\n",
300
+ " align-items: center;\n",
301
+ " background-color: var(--sklearn-color-background);\n",
302
+ " padding-right: 1em;\n",
303
+ " padding-left: 1em;\n",
304
+ "}\n",
305
+ "\n",
306
+ "\n",
307
+ "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
308
+ "clickable and can be expanded/collapsed.\n",
309
+ "- Pipeline and ColumnTransformer use this feature and define the default style\n",
310
+ "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
311
+ "*/\n",
312
+ "\n",
313
+ "/* Pipeline and ColumnTransformer style (default) */\n",
314
+ "\n",
315
+ "#sk-container-id-4 div.sk-toggleable {\n",
316
+ " /* Default theme specific background. It is overwritten whether we have a\n",
317
+ " specific estimator or a Pipeline/ColumnTransformer */\n",
318
+ " background-color: var(--sklearn-color-background);\n",
319
+ "}\n",
320
+ "\n",
321
+ "/* Toggleable label */\n",
322
+ "#sk-container-id-4 label.sk-toggleable__label {\n",
323
+ " cursor: pointer;\n",
324
+ " display: flex;\n",
325
+ " width: 100%;\n",
326
+ " margin-bottom: 0;\n",
327
+ " padding: 0.5em;\n",
328
+ " box-sizing: border-box;\n",
329
+ " text-align: center;\n",
330
+ " align-items: center;\n",
331
+ " justify-content: center;\n",
332
+ " gap: 0.5em;\n",
333
+ "}\n",
334
+ "\n",
335
+ "#sk-container-id-4 label.sk-toggleable__label .caption {\n",
336
+ " font-size: 0.6rem;\n",
337
+ " font-weight: lighter;\n",
338
+ " color: var(--sklearn-color-text-muted);\n",
339
+ "}\n",
340
+ "\n",
341
+ "#sk-container-id-4 label.sk-toggleable__label-arrow:before {\n",
342
+ " /* Arrow on the left of the label */\n",
343
+ " content: \"▸\";\n",
344
+ " float: left;\n",
345
+ " margin-right: 0.25em;\n",
346
+ " color: var(--sklearn-color-icon);\n",
347
+ "}\n",
348
+ "\n",
349
+ "#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {\n",
350
+ " color: var(--sklearn-color-text);\n",
351
+ "}\n",
352
+ "\n",
353
+ "/* Toggleable content - dropdown */\n",
354
+ "\n",
355
+ "#sk-container-id-4 div.sk-toggleable__content {\n",
356
+ " display: none;\n",
357
+ " text-align: left;\n",
358
+ " /* unfitted */\n",
359
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
360
+ "}\n",
361
+ "\n",
362
+ "#sk-container-id-4 div.sk-toggleable__content.fitted {\n",
363
+ " /* fitted */\n",
364
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
365
+ "}\n",
366
+ "\n",
367
+ "#sk-container-id-4 div.sk-toggleable__content pre {\n",
368
+ " margin: 0.2em;\n",
369
+ " border-radius: 0.25em;\n",
370
+ " color: var(--sklearn-color-text);\n",
371
+ " /* unfitted */\n",
372
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
373
+ "}\n",
374
+ "\n",
375
+ "#sk-container-id-4 div.sk-toggleable__content.fitted pre {\n",
376
+ " /* unfitted */\n",
377
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
378
+ "}\n",
379
+ "\n",
380
+ "#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
381
+ " /* Expand drop-down */\n",
382
+ " display: block;\n",
383
+ " width: 100%;\n",
384
+ " overflow: visible;\n",
385
+ "}\n",
386
+ "\n",
387
+ "#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
388
+ " content: \"▾\";\n",
389
+ "}\n",
390
+ "\n",
391
+ "/* Pipeline/ColumnTransformer-specific style */\n",
392
+ "\n",
393
+ "#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
394
+ " color: var(--sklearn-color-text);\n",
395
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
396
+ "}\n",
397
+ "\n",
398
+ "#sk-container-id-4 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
399
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
400
+ "}\n",
401
+ "\n",
402
+ "/* Estimator-specific style */\n",
403
+ "\n",
404
+ "/* Colorize estimator box */\n",
405
+ "#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
406
+ " /* unfitted */\n",
407
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
408
+ "}\n",
409
+ "\n",
410
+ "#sk-container-id-4 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
411
+ " /* fitted */\n",
412
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
413
+ "}\n",
414
+ "\n",
415
+ "#sk-container-id-4 div.sk-label label.sk-toggleable__label,\n",
416
+ "#sk-container-id-4 div.sk-label label {\n",
417
+ " /* The background is the default theme color */\n",
418
+ " color: var(--sklearn-color-text-on-default-background);\n",
419
+ "}\n",
420
+ "\n",
421
+ "/* On hover, darken the color of the background */\n",
422
+ "#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {\n",
423
+ " color: var(--sklearn-color-text);\n",
424
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
425
+ "}\n",
426
+ "\n",
427
+ "/* Label box, darken color on hover, fitted */\n",
428
+ "#sk-container-id-4 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
429
+ " color: var(--sklearn-color-text);\n",
430
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
431
+ "}\n",
432
+ "\n",
433
+ "/* Estimator label */\n",
434
+ "\n",
435
+ "#sk-container-id-4 div.sk-label label {\n",
436
+ " font-family: monospace;\n",
437
+ " font-weight: bold;\n",
438
+ " line-height: 1.2em;\n",
439
+ "}\n",
440
+ "\n",
441
+ "#sk-container-id-4 div.sk-label-container {\n",
442
+ " text-align: center;\n",
443
+ "}\n",
444
+ "\n",
445
+ "/* Estimator-specific */\n",
446
+ "#sk-container-id-4 div.sk-estimator {\n",
447
+ " font-family: monospace;\n",
448
+ " border: 1px dotted var(--sklearn-color-border-box);\n",
449
+ " border-radius: 0.25em;\n",
450
+ " box-sizing: border-box;\n",
451
+ " margin-bottom: 0.5em;\n",
452
+ " /* unfitted */\n",
453
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
454
+ "}\n",
455
+ "\n",
456
+ "#sk-container-id-4 div.sk-estimator.fitted {\n",
457
+ " /* fitted */\n",
458
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
459
+ "}\n",
460
+ "\n",
461
+ "/* on hover */\n",
462
+ "#sk-container-id-4 div.sk-estimator:hover {\n",
463
+ " /* unfitted */\n",
464
+ " background-color: var(--sklearn-color-unfitted-level-2);\n",
465
+ "}\n",
466
+ "\n",
467
+ "#sk-container-id-4 div.sk-estimator.fitted:hover {\n",
468
+ " /* fitted */\n",
469
+ " background-color: var(--sklearn-color-fitted-level-2);\n",
470
+ "}\n",
471
+ "\n",
472
+ "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
473
+ "\n",
474
+ "/* Common style for \"i\" and \"?\" */\n",
475
+ "\n",
476
+ ".sk-estimator-doc-link,\n",
477
+ "a:link.sk-estimator-doc-link,\n",
478
+ "a:visited.sk-estimator-doc-link {\n",
479
+ " float: right;\n",
480
+ " font-size: smaller;\n",
481
+ " line-height: 1em;\n",
482
+ " font-family: monospace;\n",
483
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
484
+ " border-radius: 1em;\n",
485
+ " height: 1em;\n",
486
+ " width: 1em;\n",
487
+ " text-decoration: none !important;\n",
488
+ " margin-left: 0.5em;\n",
489
+ " text-align: center;\n",
490
+ " /* unfitted */\n",
491
+ " border: var(--sklearn-color-unfitted-level-3) 1pt solid;\n",
492
+ " color: var(--sklearn-color-unfitted-level-3);\n",
493
+ "}\n",
494
+ "\n",
495
+ ".sk-estimator-doc-link.fitted,\n",
496
+ "a:link.sk-estimator-doc-link.fitted,\n",
497
+ "a:visited.sk-estimator-doc-link.fitted {\n",
498
+ " /* fitted */\n",
499
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
500
+ " border: var(--sklearn-color-fitted-level-3) 1pt solid;\n",
501
+ " color: var(--sklearn-color-fitted-level-3);\n",
502
+ "}\n",
503
+ "\n",
504
+ "/* On hover */\n",
505
+ "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
506
+ ".sk-estimator-doc-link:hover,\n",
507
+ "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
508
+ ".sk-estimator-doc-link:hover {\n",
509
+ " /* unfitted */\n",
510
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
511
+ " border: var(--sklearn-color-fitted-level-0) 1pt solid;\n",
512
+ " color: var(--sklearn-color-unfitted-level-0);\n",
513
+ " text-decoration: none;\n",
514
+ "}\n",
515
+ "\n",
516
+ "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
517
+ ".sk-estimator-doc-link.fitted:hover,\n",
518
+ "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
519
+ ".sk-estimator-doc-link.fitted:hover {\n",
520
+ " /* fitted */\n",
521
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
522
+ " border: var(--sklearn-color-fitted-level-0) 1pt solid;\n",
523
+ " color: var(--sklearn-color-fitted-level-0);\n",
524
+ " text-decoration: none;\n",
525
+ "}\n",
526
+ "\n",
527
+ "/* Span, style for the box shown on hovering the info icon */\n",
528
+ ".sk-estimator-doc-link span {\n",
529
+ " display: none;\n",
530
+ " z-index: 9999;\n",
531
+ " position: relative;\n",
532
+ " font-weight: normal;\n",
533
+ " right: .2ex;\n",
534
+ " padding: .5ex;\n",
535
+ " margin: .5ex;\n",
536
+ " width: min-content;\n",
537
+ " min-width: 20ex;\n",
538
+ " max-width: 50ex;\n",
539
+ " color: var(--sklearn-color-text);\n",
540
+ " box-shadow: 2pt 2pt 4pt #999;\n",
541
+ " /* unfitted */\n",
542
+ " background: var(--sklearn-color-unfitted-level-0);\n",
543
+ " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
544
+ "}\n",
545
+ "\n",
546
+ ".sk-estimator-doc-link.fitted span {\n",
547
+ " /* fitted */\n",
548
+ " background: var(--sklearn-color-fitted-level-0);\n",
549
+ " border: var(--sklearn-color-fitted-level-3);\n",
550
+ "}\n",
551
+ "\n",
552
+ ".sk-estimator-doc-link:hover span {\n",
553
+ " display: block;\n",
554
+ "}\n",
555
+ "\n",
556
+ "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
557
+ "\n",
558
+ "#sk-container-id-4 a.estimator_doc_link {\n",
559
+ " float: right;\n",
560
+ " font-size: 1rem;\n",
561
+ " line-height: 1em;\n",
562
+ " font-family: monospace;\n",
563
+ " background-color: var(--sklearn-color-unfitted-level-0);\n",
564
+ " border-radius: 1rem;\n",
565
+ " height: 1rem;\n",
566
+ " width: 1rem;\n",
567
+ " text-decoration: none;\n",
568
+ " /* unfitted */\n",
569
+ " color: var(--sklearn-color-unfitted-level-1);\n",
570
+ " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
571
+ "}\n",
572
+ "\n",
573
+ "#sk-container-id-4 a.estimator_doc_link.fitted {\n",
574
+ " /* fitted */\n",
575
+ " background-color: var(--sklearn-color-fitted-level-0);\n",
576
+ " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
577
+ " color: var(--sklearn-color-fitted-level-1);\n",
578
+ "}\n",
579
+ "\n",
580
+ "/* On hover */\n",
581
+ "#sk-container-id-4 a.estimator_doc_link:hover {\n",
582
+ " /* unfitted */\n",
583
+ " background-color: var(--sklearn-color-unfitted-level-3);\n",
584
+ " color: var(--sklearn-color-background);\n",
585
+ " text-decoration: none;\n",
586
+ "}\n",
587
+ "\n",
588
+ "#sk-container-id-4 a.estimator_doc_link.fitted:hover {\n",
589
+ " /* fitted */\n",
590
+ " background-color: var(--sklearn-color-fitted-level-3);\n",
591
+ "}\n",
592
+ "\n",
593
+ ".estimator-table {\n",
594
+ " font-family: monospace;\n",
595
+ "}\n",
596
+ "\n",
597
+ ".estimator-table summary {\n",
598
+ " padding: .5rem;\n",
599
+ " cursor: pointer;\n",
600
+ "}\n",
601
+ "\n",
602
+ ".estimator-table summary::marker {\n",
603
+ " font-size: 0.7rem;\n",
604
+ "}\n",
605
+ "\n",
606
+ ".estimator-table details[open] {\n",
607
+ " padding-left: 0.1rem;\n",
608
+ " padding-right: 0.1rem;\n",
609
+ " padding-bottom: 0.3rem;\n",
610
+ "}\n",
611
+ "\n",
612
+ ".estimator-table .parameters-table {\n",
613
+ " margin-left: auto !important;\n",
614
+ " margin-right: auto !important;\n",
615
+ " margin-top: 0;\n",
616
+ "}\n",
617
+ "\n",
618
+ ".estimator-table .parameters-table tr:nth-child(odd) {\n",
619
+ " background-color: #fff;\n",
620
+ "}\n",
621
+ "\n",
622
+ ".estimator-table .parameters-table tr:nth-child(even) {\n",
623
+ " background-color: #f6f6f6;\n",
624
+ "}\n",
625
+ "\n",
626
+ ".estimator-table .parameters-table tr:hover {\n",
627
+ " background-color: #e0e0e0;\n",
628
+ "}\n",
629
+ "\n",
630
+ ".estimator-table table td {\n",
631
+ " border: 1px solid rgba(106, 105, 104, 0.232);\n",
632
+ "}\n",
633
+ "\n",
634
+ "/*\n",
635
+ " `table td`is set in notebook with right text-align.\n",
636
+ " We need to overwrite it.\n",
637
+ "*/\n",
638
+ ".estimator-table table td.param {\n",
639
+ " text-align: left;\n",
640
+ " position: relative;\n",
641
+ " padding: 0;\n",
642
+ "}\n",
643
+ "\n",
644
+ ".user-set td {\n",
645
+ " color:rgb(255, 94, 0);\n",
646
+ " text-align: left !important;\n",
647
+ "}\n",
648
+ "\n",
649
+ ".user-set td.value {\n",
650
+ " color:rgb(255, 94, 0);\n",
651
+ " background-color: transparent;\n",
652
+ "}\n",
653
+ "\n",
654
+ ".default td {\n",
655
+ " color: black;\n",
656
+ " text-align: left !important;\n",
657
+ "}\n",
658
+ "\n",
659
+ ".user-set td i,\n",
660
+ ".default td i {\n",
661
+ " color: black;\n",
662
+ "}\n",
663
+ "\n",
664
+ "/*\n",
665
+ " Styles for parameter documentation links\n",
666
+ " We need styling for visited so jupyter doesn't overwrite it\n",
667
+ "*/\n",
668
+ "a.param-doc-link,\n",
669
+ "a.param-doc-link:link,\n",
670
+ "a.param-doc-link:visited {\n",
671
+ " text-decoration: underline dashed;\n",
672
+ " text-underline-offset: .3em;\n",
673
+ " color: inherit;\n",
674
+ " display: block;\n",
675
+ " padding: .5em;\n",
676
+ "}\n",
677
+ "\n",
678
+ "/* \"hack\" to make the entire area of the cell containing the link clickable */\n",
679
+ "a.param-doc-link::before {\n",
680
+ " position: absolute;\n",
681
+ " content: \"\";\n",
682
+ " inset: 0;\n",
683
+ "}\n",
684
+ "\n",
685
+ ".param-doc-description {\n",
686
+ " display: none;\n",
687
+ " position: absolute;\n",
688
+ " z-index: 9999;\n",
689
+ " left: 0;\n",
690
+ " padding: .5ex;\n",
691
+ " margin-left: 1.5em;\n",
692
+ " color: var(--sklearn-color-text);\n",
693
+ " box-shadow: .3em .3em .4em #999;\n",
694
+ " width: max-content;\n",
695
+ " text-align: left;\n",
696
+ " max-height: 10em;\n",
697
+ " overflow-y: auto;\n",
698
+ "\n",
699
+ " /* unfitted */\n",
700
+ " background: var(--sklearn-color-unfitted-level-0);\n",
701
+ " border: thin solid var(--sklearn-color-unfitted-level-3);\n",
702
+ "}\n",
703
+ "\n",
704
+ "/* Fitted state for parameter tooltips */\n",
705
+ ".fitted .param-doc-description {\n",
706
+ " /* fitted */\n",
707
+ " background: var(--sklearn-color-fitted-level-0);\n",
708
+ " border: thin solid var(--sklearn-color-fitted-level-3);\n",
709
+ "}\n",
710
+ "\n",
711
+ ".param-doc-link:hover .param-doc-description {\n",
712
+ " display: block;\n",
713
+ "}\n",
714
+ "\n",
715
+ ".copy-paste-icon {\n",
716
+ " background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0NDggNTEyIj48IS0tIUZvbnQgQXdlc29tZSBGcmVlIDYuNy4yIGJ5IEBmb250YXdlc29tZSAtIGh0dHBzOi8vZm9udGF3ZXNvbWUuY29tIExpY2Vuc2UgLSBodHRwczovL2ZvbnRhd2Vzb21lLmNvbS9saWNlbnNlL2ZyZWUgQ29weXJpZ2h0IDIwMjUgRm9udGljb25zLCBJbmMuLS0+PHBhdGggZD0iTTIwOCAwTDMzMi4xIDBjMTIuNyAwIDI0LjkgNS4xIDMzLjkgMTQuMWw2Ny45IDY3LjljOSA5IDE0LjEgMjEuMiAxNC4xIDMzLjlMNDQ4IDMzNmMwIDI2LjUtMjEuNSA0OC00OCA0OGwtMTkyIDBjLTI2LjUgMC00OC0yMS41LTQ4LTQ4bDAtMjg4YzAtMjYuNSAyMS41LTQ4IDQ4LTQ4ek00OCAxMjhsODAgMCAwIDY0LTY0IDAgMCAyNTYgMTkyIDAgMC0zMiA2NCAwIDAgNDhjMCAyNi41LTIxLjUgNDgtNDggNDhMNDggNTEyYy0yNi41IDAtNDgtMjEuNS00OC00OEwwIDE3NmMwLTI2LjUgMjEuNS00OCA0OC00OHoiLz48L3N2Zz4=);\n",
717
+ " background-repeat: no-repeat;\n",
718
+ " background-size: 14px 14px;\n",
719
+ " background-position: 0;\n",
720
+ " display: inline-block;\n",
721
+ " width: 14px;\n",
722
+ " height: 14px;\n",
723
+ " cursor: pointer;\n",
724
+ "}\n",
725
+ "</style><body><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Ridge(alpha=2.0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" checked><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>Ridge</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html\">?<span>Documentation for Ridge</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\" data-param-prefix=\"\">\n",
726
+ " <div class=\"estimator-table\">\n",
727
+ " <details>\n",
728
+ " <summary>Parameters</summary>\n",
729
+ " <table class=\"parameters-table\">\n",
730
+ " <tbody>\n",
731
+ " \n",
732
+ " <tr class=\"user-set\">\n",
733
+ " <td><i class=\"copy-paste-icon\"\n",
734
+ " onclick=\"copyToClipboard('alpha',\n",
735
+ " this.parentElement.nextElementSibling)\"\n",
736
+ " ></i></td>\n",
737
+ " <td class=\"param\">\n",
738
+ " <a class=\"param-doc-link\"\n",
739
+ " rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=alpha,-%7Bfloat%2C%20ndarray%20of%20shape%20%28n_targets%2C%29%7D%2C%20default%3D1.0\">\n",
740
+ " alpha\n",
741
+ " <span class=\"param-doc-description\">alpha: {float, ndarray of shape (n_targets,)}, default=1.0<br><br>Constant that multiplies the L2 term, controlling regularization<br>strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.<br><br>When `alpha = 0`, the objective is equivalent to ordinary least<br>squares, solved by the :class:`LinearRegression` object. For numerical<br>reasons, using `alpha = 0` with the `Ridge` object is not advised.<br>Instead, you should use the :class:`LinearRegression` object.<br><br>If an array is passed, penalties are assumed to be specific to the<br>targets. Hence they must correspond in number.</span>\n",
742
+ " </a>\n",
743
+ " </td>\n",
744
+ " <td class=\"value\">2.0</td>\n",
745
+ " </tr>\n",
746
+ " \n",
747
+ "\n",
748
+ " <tr class=\"default\">\n",
749
+ " <td><i class=\"copy-paste-icon\"\n",
750
+ " onclick=\"copyToClipboard('fit_intercept',\n",
751
+ " this.parentElement.nextElementSibling)\"\n",
752
+ " ></i></td>\n",
753
+ " <td class=\"param\">\n",
754
+ " <a class=\"param-doc-link\"\n",
755
+ " rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=fit_intercept,-bool%2C%20default%3DTrue\">\n",
756
+ " fit_intercept\n",
757
+ " <span class=\"param-doc-description\">fit_intercept: bool, default=True<br><br>Whether to fit the intercept for this model. If set<br>to false, no intercept will be used in calculations<br>(i.e. ``X`` and ``y`` are expected to be centered).</span>\n",
758
+ " </a>\n",
759
+ " </td>\n",
760
+ " <td class=\"value\">True</td>\n",
761
+ " </tr>\n",
762
+ " \n",
763
+ "\n",
764
+ " <tr class=\"default\">\n",
765
+ " <td><i class=\"copy-paste-icon\"\n",
766
+ " onclick=\"copyToClipboard('copy_X',\n",
767
+ " this.parentElement.nextElementSibling)\"\n",
768
+ " ></i></td>\n",
769
+ " <td class=\"param\">\n",
770
+ " <a class=\"param-doc-link\"\n",
771
+ " rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=copy_X,-bool%2C%20default%3DTrue\">\n",
772
+ " copy_X\n",
773
+ " <span class=\"param-doc-description\">copy_X: bool, default=True<br><br>If True, X will be copied; else, it may be overwritten.</span>\n",
774
+ " </a>\n",
775
+ " </td>\n",
776
+ " <td class=\"value\">True</td>\n",
777
+ " </tr>\n",
778
+ " \n",
779
+ "\n",
780
+ " <tr class=\"default\">\n",
781
+ " <td><i class=\"copy-paste-icon\"\n",
782
+ " onclick=\"copyToClipboard('max_iter',\n",
783
+ " this.parentElement.nextElementSibling)\"\n",
784
+ " ></i></td>\n",
785
+ " <td class=\"param\">\n",
786
+ " <a class=\"param-doc-link\"\n",
787
+ " rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=max_iter,-int%2C%20default%3DNone\">\n",
788
+ " max_iter\n",
789
+ " <span class=\"param-doc-description\">max_iter: int, default=None<br><br>Maximum number of iterations for conjugate gradient solver.<br>For 'sparse_cg' and 'lsqr' solvers, the default value is determined<br>by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.<br>For 'lbfgs' solver, the default value is 15000.</span>\n",
790
+ " </a>\n",
791
+ " </td>\n",
792
+ " <td class=\"value\">None</td>\n",
793
+ " </tr>\n",
794
+ " \n",
795
+ "\n",
796
+ " <tr class=\"default\">\n",
797
+ " <td><i class=\"copy-paste-icon\"\n",
798
+ " onclick=\"copyToClipboard('tol',\n",
799
+ " this.parentElement.nextElementSibling)\"\n",
800
+ " ></i></td>\n",
801
+ " <td class=\"param\">\n",
802
+ " <a class=\"param-doc-link\"\n",
803
+ " rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=tol,-float%2C%20default%3D1e-4\">\n",
804
+ " tol\n",
805
+ " <span class=\"param-doc-description\">tol: float, default=1e-4<br><br>The precision of the solution (`coef_`) is determined by `tol` which<br>specifies a different convergence criterion for each solver:<br><br>- 'svd': `tol` has no impact.<br><br>- 'cholesky': `tol` has no impact.<br><br>- 'sparse_cg': norm of residuals smaller than `tol`.<br><br>- 'lsqr': `tol` is set as atol and btol of scipy.sparse.linalg.lsqr,<br> which control the norm of the residual vector in terms of the norms of<br> matrix and coefficients.<br><br>- 'sag' and 'saga': relative change of coef smaller than `tol`.<br><br>- 'lbfgs': maximum of the absolute (projected) gradient=max|residuals|<br> smaller than `tol`.<br><br>.. versionchanged:: 1.2<br> Default value changed from 1e-3 to 1e-4 for consistency with other linear<br> models.</span>\n",
806
+ " </a>\n",
807
+ " </td>\n",
808
+ " <td class=\"value\">0.0001</td>\n",
809
+ " </tr>\n",
810
+ " \n",
811
+ "\n",
812
+ " <tr class=\"default\">\n",
813
+ " <td><i class=\"copy-paste-icon\"\n",
814
+ " onclick=\"copyToClipboard('solver',\n",
815
+ " this.parentElement.nextElementSibling)\"\n",
816
+ " ></i></td>\n",
817
+ " <td class=\"param\">\n",
818
+ " <a class=\"param-doc-link\"\n",
819
+ " rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=solver,-%7B%27auto%27%2C%20%27svd%27%2C%20%27cholesky%27%2C%20%27lsqr%27%2C%20%27sparse_cg%27%2C%20%20%20%20%20%20%20%20%20%20%20%20%20%27sag%27%2C%20%27saga%27%2C%20%27lbfgs%27%7D%2C%20default%3D%27auto%27\">\n",
820
+ " solver\n",
821
+ " <span class=\"param-doc-description\">solver: {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga', 'lbfgs'}, default='auto'<br><br>Solver to use in the computational routines:<br><br>- 'auto' chooses the solver automatically based on the type of data.<br><br>- 'svd' uses a Singular Value Decomposition of X to compute the Ridge<br> coefficients. It is the most stable solver, in particular more stable<br> for singular matrices than 'cholesky' at the cost of being slower.<br><br>- 'cholesky' uses the standard :func:`scipy.linalg.solve` function to<br> obtain a closed-form solution.<br><br>- 'sparse_cg' uses the conjugate gradient solver as found in<br> :func:`scipy.sparse.linalg.cg`. As an iterative algorithm, this solver is<br> more appropriate than 'cholesky' for large-scale data<br> (possibility to set `tol` and `max_iter`).<br><br>- 'lsqr' uses the dedicated regularized least-squares routine<br> :func:`scipy.sparse.linalg.lsqr`. It is the fastest and uses an iterative<br> procedure.<br><br>- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses<br> its improved, unbiased version named SAGA. Both methods also use an<br> iterative procedure, and are often faster than other solvers when<br> both n_samples and n_features are large. Note that 'sag' and<br> 'saga' fast convergence is only guaranteed on features with<br> approximately the same scale. You can preprocess the data with a<br> scaler from :mod:`sklearn.preprocessing`.<br><br>- 'lbfgs' uses L-BFGS-B algorithm implemented in<br> :func:`scipy.optimize.minimize`. It can be used only when `positive`<br> is True.<br><br>All solvers except 'svd' support both dense and sparse data. However, only<br>'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when<br>`fit_intercept` is True.<br><br>.. versionadded:: 0.17<br> Stochastic Average Gradient descent solver.<br>.. versionadded:: 0.19<br> SAGA solver.</span>\n",
822
+ " </a>\n",
823
+ " </td>\n",
824
+ " <td class=\"value\">&#x27;auto&#x27;</td>\n",
825
+ " </tr>\n",
826
+ " \n",
827
+ "\n",
828
+ " <tr class=\"default\">\n",
829
+ " <td><i class=\"copy-paste-icon\"\n",
830
+ " onclick=\"copyToClipboard('positive',\n",
831
+ " this.parentElement.nextElementSibling)\"\n",
832
+ " ></i></td>\n",
833
+ " <td class=\"param\">\n",
834
+ " <a class=\"param-doc-link\"\n",
835
+ " rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=positive,-bool%2C%20default%3DFalse\">\n",
836
+ " positive\n",
837
+ " <span class=\"param-doc-description\">positive: bool, default=False<br><br>When set to ``True``, forces the coefficients to be positive.<br>Only 'lbfgs' solver is supported in this case.</span>\n",
838
+ " </a>\n",
839
+ " </td>\n",
840
+ " <td class=\"value\">False</td>\n",
841
+ " </tr>\n",
842
+ " \n",
843
+ "\n",
844
+ " <tr class=\"default\">\n",
845
+ " <td><i class=\"copy-paste-icon\"\n",
846
+ " onclick=\"copyToClipboard('random_state',\n",
847
+ " this.parentElement.nextElementSibling)\"\n",
848
+ " ></i></td>\n",
849
+ " <td class=\"param\">\n",
850
+ " <a class=\"param-doc-link\"\n",
851
+ " rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=random_state,-int%2C%20RandomState%20instance%2C%20default%3DNone\">\n",
852
+ " random_state\n",
853
+ " <span class=\"param-doc-description\">random_state: int, RandomState instance, default=None<br><br>Used when ``solver`` == 'sag' or 'saga' to shuffle the data.<br>See :term:`Glossary <random_state>` for details.<br><br>.. versionadded:: 0.17<br> `random_state` to support Stochastic Average Gradient.</span>\n",
854
+ " </a>\n",
855
+ " </td>\n",
856
+ " <td class=\"value\">None</td>\n",
857
+ " </tr>\n",
858
+ " \n",
859
+ " </tbody>\n",
860
+ " </table>\n",
861
+ " </details>\n",
862
+ " </div>\n",
863
+ " </div></div></div></div></div><script>function copyToClipboard(text, element) {\n",
864
+ " // Get the parameter prefix from the closest toggleable content\n",
865
+ " const toggleableContent = element.closest('.sk-toggleable__content');\n",
866
+ " const paramPrefix = toggleableContent ? toggleableContent.dataset.paramPrefix : '';\n",
867
+ " const fullParamName = paramPrefix ? `${paramPrefix}${text}` : text;\n",
868
+ "\n",
869
+ " const originalStyle = element.style;\n",
870
+ " const computedStyle = window.getComputedStyle(element);\n",
871
+ " const originalWidth = computedStyle.width;\n",
872
+ " const originalHTML = element.innerHTML.replace('Copied!', '');\n",
873
+ "\n",
874
+ " navigator.clipboard.writeText(fullParamName)\n",
875
+ " .then(() => {\n",
876
+ " element.style.width = originalWidth;\n",
877
+ " element.style.color = 'green';\n",
878
+ " element.innerHTML = \"Copied!\";\n",
879
+ "\n",
880
+ " setTimeout(() => {\n",
881
+ " element.innerHTML = originalHTML;\n",
882
+ " element.style = originalStyle;\n",
883
+ " }, 2000);\n",
884
+ " })\n",
885
+ " .catch(err => {\n",
886
+ " console.error('Failed to copy:', err);\n",
887
+ " element.style.color = 'red';\n",
888
+ " element.innerHTML = \"Failed!\";\n",
889
+ " setTimeout(() => {\n",
890
+ " element.innerHTML = originalHTML;\n",
891
+ " element.style = originalStyle;\n",
892
+ " }, 2000);\n",
893
+ " });\n",
894
+ " return false;\n",
895
+ "}\n",
896
+ "\n",
897
+ "document.querySelectorAll('.copy-paste-icon').forEach(function(element) {\n",
898
+ " const toggleableContent = element.closest('.sk-toggleable__content');\n",
899
+ " const paramPrefix = toggleableContent ? toggleableContent.dataset.paramPrefix : '';\n",
900
+ " const paramName = element.parentElement.nextElementSibling\n",
901
+ " .textContent.trim().split(' ')[0];\n",
902
+ " const fullParamName = paramPrefix ? `${paramPrefix}${paramName}` : paramName;\n",
903
+ "\n",
904
+ " element.setAttribute('title', fullParamName);\n",
905
+ "});\n",
906
+ "\n",
907
+ "\n",
908
+ "/**\n",
909
+ " * Adapted from Skrub\n",
910
+ " * https://github.com/skrub-data/skrub/blob/403466d1d5d4dc76a7ef569b3f8228db59a31dc3/skrub/_reporting/_data/templates/report.js#L789\n",
911
+ " * @returns \"light\" or \"dark\"\n",
912
+ " */\n",
913
+ "function detectTheme(element) {\n",
914
+ " const body = document.querySelector('body');\n",
915
+ "\n",
916
+ " // Check VSCode theme\n",
917
+ " const themeKindAttr = body.getAttribute('data-vscode-theme-kind');\n",
918
+ " const themeNameAttr = body.getAttribute('data-vscode-theme-name');\n",
919
+ "\n",
920
+ " if (themeKindAttr && themeNameAttr) {\n",
921
+ " const themeKind = themeKindAttr.toLowerCase();\n",
922
+ " const themeName = themeNameAttr.toLowerCase();\n",
923
+ "\n",
924
+ " if (themeKind.includes(\"dark\") || themeName.includes(\"dark\")) {\n",
925
+ " return \"dark\";\n",
926
+ " }\n",
927
+ " if (themeKind.includes(\"light\") || themeName.includes(\"light\")) {\n",
928
+ " return \"light\";\n",
929
+ " }\n",
930
+ " }\n",
931
+ "\n",
932
+ " // Check Jupyter theme\n",
933
+ " if (body.getAttribute('data-jp-theme-light') === 'false') {\n",
934
+ " return 'dark';\n",
935
+ " } else if (body.getAttribute('data-jp-theme-light') === 'true') {\n",
936
+ " return 'light';\n",
937
+ " }\n",
938
+ "\n",
939
+ " // Guess based on a parent element's color\n",
940
+ " const color = window.getComputedStyle(element.parentNode, null).getPropertyValue('color');\n",
941
+ " const match = color.match(/^rgb\\s*\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)\\s*$/i);\n",
942
+ " if (match) {\n",
943
+ " const [r, g, b] = [\n",
944
+ " parseFloat(match[1]),\n",
945
+ " parseFloat(match[2]),\n",
946
+ " parseFloat(match[3])\n",
947
+ " ];\n",
948
+ "\n",
949
+ " // https://en.wikipedia.org/wiki/HSL_and_HSV#Lightness\n",
950
+ " const luma = 0.299 * r + 0.587 * g + 0.114 * b;\n",
951
+ "\n",
952
+ " if (luma > 180) {\n",
953
+ " // If the text is very bright we have a dark theme\n",
954
+ " return 'dark';\n",
955
+ " }\n",
956
+ " if (luma < 75) {\n",
957
+ " // If the text is very dark we have a light theme\n",
958
+ " return 'light';\n",
959
+ " }\n",
960
+ " // Otherwise fall back to the next heuristic.\n",
961
+ " }\n",
962
+ "\n",
963
+ " // Fallback to system preference\n",
964
+ " return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light';\n",
965
+ "}\n",
966
+ "\n",
967
+ "\n",
968
+ "function forceTheme(elementId) {\n",
969
+ " const estimatorElement = document.querySelector(`#${elementId}`);\n",
970
+ " if (estimatorElement === null) {\n",
971
+ " console.error(`Element with id ${elementId} not found.`);\n",
972
+ " } else {\n",
973
+ " const theme = detectTheme(estimatorElement);\n",
974
+ " estimatorElement.classList.add(theme);\n",
975
+ " }\n",
976
+ "}\n",
977
+ "\n",
978
+ "forceTheme('sk-container-id-4');</script></body>"
979
+ ],
980
+ "text/plain": [
981
+ "Ridge(alpha=2.0)"
982
+ ]
983
+ },
984
+ "execution_count": 17,
985
+ "metadata": {},
986
+ "output_type": "execute_result"
987
+ }
988
+ ],
989
+ "source": [
990
+ "#creation of the model\n",
991
+ "model = LinearRegression()\n",
992
+ "model.fit(x_train,y_train)"
993
+ ]
994
+ },
995
+ {
996
+ "cell_type": "code",
997
+ "execution_count": 18,
998
+ "id": "8ccc5166-fb20-44a7-95bc-925e5b3c8f4d",
999
+ "metadata": {},
1000
+ "outputs": [
1001
+ {
1002
+ "name": "stdout",
1003
+ "output_type": "stream",
1004
+ "text": [
1005
+ "la valeur de a est : [0.9314094]\n",
1006
+ "la valeur de b est -11.480004226844926\n"
1007
+ ]
1008
+ }
1009
+ ],
1010
+ "source": [
1011
+ "#y=ax+b\n",
1012
+ "print('la valeur de a est :',model.coef_)\n",
1013
+ "print('la valeur de b est ',model.intercept_)"
1014
+ ]
1015
+ },
1016
+ {
1017
+ "cell_type": "markdown",
1018
+ "id": "93239f43-5f1f-4a35-9479-0b9de2752743",
1019
+ "metadata": {},
1020
+ "source": [
1021
+ "### 🧠 Interprétation des Résultats\n",
1022
+ "\n",
1023
+ "Voici ce que les mathématiques racontent sur la réalité de nos écoles :\n",
1024
+ "\n",
1025
+ "Mon modèle a trouvé l'équation suivante :\n",
1026
+ "$$Absents = 0.066 \\times Inscrits + 11.12$$\n",
1027
+ "\n",
1028
+ "**1. Le Coefficient ($a \\approx 0.066$) : Le Taux d'Absentéisme**\n",
1029
+ "* C'est la pente de la droite.\n",
1030
+ "* Cela signifie que **pour chaque nouvel élève inscrit**, le nombre d'absents augmente d'environ **0.066**.\n",
1031
+ "* *En clair :* Sur un groupe de 100 élèves, on peut s'attendre statistiquement à ce qu'environ **6 ou 7** soient absents ($100 \\times 0.066 = 6.6$).\n",
1032
+ "\n",
1033
+ "**2. L'Intercept ($b \\approx 11.12$) : Le Calibrage**\n",
1034
+ "* C'est l'ordonnée à l'origine (le point de départ de la droite).\n",
1035
+ "* Théoriquement, cela voudrait dire qu'une école avec **0 élève** aurait quand même **11 absents**.\n",
1036
+ "* *En réalité :* C'est physiquement impossible, mais c'est un ajustement mathématique nécessaire pour que la ligne droite passe au mieux au milieu du nuage de points."
1037
+ ]
1038
+ },
1039
+ {
1040
+ "cell_type": "code",
1041
+ "execution_count": 19,
1042
+ "id": "84752c5b-601b-4923-9774-6c5d05d0a212",
1043
+ "metadata": {},
1044
+ "outputs": [
1045
+ {
1046
+ "name": "stdout",
1047
+ "output_type": "stream",
1048
+ "text": [
1049
+ "[ 221.37234507 183.18455978 497.06952663 ... 379.71194259 384.36898957\n",
1050
+ " 1263.6194605 ]\n"
1051
+ ]
1052
+ }
1053
+ ],
1054
+ "source": [
1055
+ "#prediction\n",
1056
+ "predict = model.predict(x_test)\n",
1057
+ "print(predict)\n"
1058
+ ]
1059
+ },
1060
+ {
1061
+ "cell_type": "code",
1062
+ "execution_count": 23,
1063
+ "id": "4c4cc377-b7a5-4640-baac-097637abfe12",
1064
+ "metadata": {},
1065
+ "outputs": [
1066
+ {
1067
+ "data": {
1068
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAiRRJREFUeJzt3XlcVFX/wPHPAIICAoosKoj7vmsZpqlJ4lYZWm6plWW5lLimbWqbZptmaam/R3tKs0xK09RIcTcrU3PLTDFXcAVcQYbz++M+M3JhgBmYYVi+79frvnDOPffec++MzJezGpRSCiGEEEKIUszF2QUQQgghhHA2CYiEEEIIUepJQCSEEEKIUk8CIiGEEEKUehIQCSGEEKLUk4BICCGEEKWeBERCCCGEKPUkIBJCCCFEqScBkdDZsmULr7/+OsnJyc4uiihCVq1axTvvvMPt27edXRQhhHAICYiE2b///kuvXr0oX748vr6+Vh1TvXp1nnjiCfPrTZs2YTAY2LRpk93KZTAYmDp1qt3OV1BPPPEE1atXt9v5pk6disFgsNv57O3333+nX79+1K5dmzJlyji7OIUmIyODxo0b89Zbbzm7KA5h78+xo61btw5vb28uXLhg03EjRozggQcecFCpxD333MPEiROdXQy7kICoGFu8eDEGg8G8lS1blrp16zJq1CgSExNtOtft27fp27cvTzzxBGPGjHFQiXP2448/FqmgpzgzBVimzdPTk4YNG/LKK6+QkpJi07mSkpJ47LHHmDFjBr1793ZQiYumr776ilOnTjFq1CiL++fOnYvBYKBNmzYW9x86dIipU6dy4sQJi8cuXrzYjqUt+bp27Urt2rWZPn261cfEx8ezcOFCXnrpJXPaiRMndP8/XFxcqFixIt26dWPnzp3ZzrFhwwaeeuop6tati6enJzVr1uTpp5/m3LlzdrkvWyUlJTFs2DACAgLw8vKiU6dO/PHHH1Yf//HHH9OgQQM8PDyoWrUqY8eO5fr169nynTt3jmHDhlGjRg3KlStHrVq1GDt2LJcuXdLle/HFF/nkk09ISEgo8L05nRLF1qJFixSgXn/9dfXFF1+oBQsWqCFDhigXFxdVo0YNdf36davPtWfPHvXhhx+qjIwMm8oQFhamhgwZYn5tNBrVzZs3ldFotOk8I0eOVDl9HG/evKlu375t0/kcaciQISosLMxu55syZUqO916Q882bN0998cUXat68eeqRRx5RgAoPD7fpPY6Li1P/93//Z7eyFSfNmjVTw4YNy3F/27ZtVfXq1RWgjh49mm3/8uXLFaDi4uKy7WvUqJHq0KGDHUtrO3t/jgvD3Llzlaenp0pJSbEq/+jRo1XdunV1afHx8QpQ/fv3V1988YVavHixeumll5Sfn5/y8PBQf/75py5/q1atVI0aNdTEiRPVggUL1OTJk1X58uVVUFCQOnfunN3uzRpGo1G1bdtWeXl5qalTp6qPP/5YNWzYUJUvX179/fffeR4/ceJEBag+ffqoefPmqeeff165ubmpLl266PJdvXpVhYWFqUqVKqnXXntNLViwQI0aNUqVKVNGNW/eXPf73Wg0quDgYPXqq6/a/X4LmwRExZgpIPrtt9906WPHjlWAWrp0aY7HXrt2zS5lyBoQ5VduAVFRU1wCogsXLujSo6KiFKB27NiR47G2BNEl2R9//KEA9fPPP1vcf/z4cQWomJgYFRAQoKZOnZotjwRE9peYmKhcXV2tCtLT0tJUpUqV1CuvvKJLNwVE7777ri597dq1ClDDhw/XpW/evDnbH3ibN29WgHr55ZfzeSf58/XXXytALV++3Jx2/vx55efnp/r375/rsWfPnlVubm5q0KBBuvQ5c+YoQK1atcqctmTJEgWo1atX6/K+9tprClB//PGHLn3UqFEqLCzM5j+oixppMiuB7r//fkCrLgatr4C3tzfHjh2je/fulC9fnoEDBwJaP4lZs2bRqFEjypYtS1BQEM8++yxXrlzRnVMpxZtvvklISAienp506tSJgwcPZrt2Tn2Idu3aRffu3alQoQJeXl40bdqU2bNnm8v3ySefAOiqsk0s9SHas2cP3bp1w8fHB29vbzp37swvv/yiy2NqUty+fTtjx441VzE/8sgjVvdD+P7772ncuDFly5alcePGfPfddxbzWfscrbVo0SLuv/9+AgMD8fDwoGHDhsybNy9f5zLJ+rno2LEjjRs3Zvfu3dx33314enqamxZSU1OZMmUKtWvXxsPDg9DQUCZOnEhqaqrunLGxsbRr1w4/Pz+8vb2pV6+ernnClnMZDAZGjRplfuYeHh40atSIdevWZbuXM2fOMHToUKpUqYKHhwc1atRg+PDhpKWlmfMkJSURHR1NaGgoHh4e1K5dm3feeYeMjIw8n9X333+Pu7s79913n8X9S5YsoUKFCvTo0YM+ffqwZMkS3f7Fixfz6KOPAtCpUyfzZ3rTpk1Ur16dgwcPsnnzZnN6x44dbSq3qdnnvffeY/78+dSqVQsPDw/uuusufvvtN4v3Y8/PcfXq1enZsyfbtm3j7rvvpmzZstSsWZP//ve/2c6ZlJTEmDFjqF69Oh4eHoSEhDB48GAuXrzItWvX8PLyYvTo0dmOO336NK6urromssDAQJo2bcrKlSstlj+zbdu2cfHiRSIiIvLMC9C+fXsAjh07pku/7777cHFxyZZWsWJFDh8+bNW57eXbb78lKCiIqKgoc1pAQACPPfYYK1euzPZ/KrOdO3eSnp5Ov379dOmm18uWLTOnmZrWg4KCdHkrV64MQLly5XTpDzzwAP/++y979+61/aaKEDdnF0DYn+k/tL+/vzktPT2dyMhI2rVrx3vvvYenpycAzz77LIsXL+bJJ5/khRdeID4+no8//pg9e/awfft2cyfa1157jTfffJPu3bvTvXt3/vjjD7p06aL7AspJbGwsPXv2pHLlyowePZrg4GAOHz7M6tWrGT16NM8++yxnz54lNjaWL774Is/zHTx4kPbt2+Pj48PEiRMpU6YMn332GR07dmTz5s3Z+nQ8//zzVKhQgSlTpnDixAlmzZrFqFGj+Prrr3O9zk8//UTv3r1p2LAh06dP59KlSzz55JOEhIRky2vtc7TWvHnzaNSoEQ899BBubm788MMPjBgxgoyMDEaOHGnTuUwsfS4uXbpEt27d6NevH48//jhBQUFkZGTw0EMPsW3bNoYNG0aDBg3Yv38/H374IX///Tfff/89oL0PPXv2pGnTprz++ut4eHjwzz//sH37dvP5rT2XybZt24iJiWHEiBGUL1+ejz76iN69e3Py5Elzuc+ePcvdd99t7ktRv359zpw5w7fffsuNGzdwd3fnxo0bdOjQgTNnzvDss89SrVo1duzYweTJkzl37hyzZs3K9Vnt2LGDxo0b5/i+LVmyhKioKNzd3enfvz/z5s3jt99+46677gK0L8wXXniBjz76iJdeeokGDRoA0KBBA2bNmsXzzz+Pt7c3L7/8MnDni8fWci9dupSrV6/y7LPPYjAYmDlzJlFRURw/ftxcdkd9jv/55x/69OnD0KFDGTJkCP/5z3944oknaNWqFY0aNQLg2rVrtG/fnsOHD/PUU0/RsmVLLl68yKpVqzh9+jTNmzfnkUce4euvv+aDDz7A1dXVfP6vvvoKpZT5jzeTVq1aZfvc5PQeGgwGWrRokWdewNzXq0KFCnnmvXbtGteuXaNSpUp55r1x4wY3btzIM5+rq2ue196zZw8tW7bMFqDdfffdzJ8/n7///psmTZpYPNYULGUNZkzfBbt37zanmYLA0aNH8/777xMSEsKff/7JW2+9Ra9evahfv77uHK1atQJg+/btVj/vIsnZVVQi/0xNZj///LO6cOGCOnXqlFq2bJny9/dX5cqVU6dPn1ZKaVXjgJo0aZLu+K1btypALVmyRJe+bt06Xfr58+eVu7u76tGjh65K9KWXXlKArsksLi5O10yQnp6uatSoocLCwtSVK1d018l8rtyazAA1ZcoU8+tevXopd3d3dezYMXPa2bNnVfny5dV9992X7flERETorjVmzBjl6uqqkpKSLF7PpHnz5qpy5cq6fD/99JMCdE0N1j7HnFhqMrtx40a2fJGRkapmzZq5nivz+Y4cOaIuXLig4uPj1WeffaY8PDxUUFCQuVmsQ4cOClCffvqp7vgvvvhCubi4qK1bt+rSP/30UwWo7du3K6WU+vDDDy02zeXnXEpp77O7u7v6559/zGn79u1TgJozZ445bfDgwcrFxSVbU7FSdz5Tb7zxhvLy8srWr2LSpEnK1dVVnTx5MscyK6VUSEiI6t27t8V9v//+uwJUbGys+ZohISFq9OjRunz5aTKzttymZh9/f391+fJlc76VK1cqQP3www/mNEd8jsPCwhSgtmzZYk47f/688vDwUOPGjTOnmZpYYmJist2r6b1av369AtTatWt1+5s2bWrxGb399tsKUImJidn2Zfb4448rf3//bOmmZzdt2jR14cIFlZCQoLZu3aruuuuubM1ROXnjjTcUoDZs2JBnXtP/x7w2a5ovvby81FNPPZUtfc2aNQpQ69aty/HY3bt3K0C98cYbunTT++vt7a1LX7hwofLz89OVcciQITn253R3d8/W3FjcSJNZCRAREUFAQAChoaH069cPb29vvvvuO6pWrarLN3z4cN3r5cuX4+vrywMPPMDFixfNW6tWrfD29iYuLg6An3/+mbS0NJ5//nldU1Z0dHSeZduzZw/x8fFER0fj5+en25efoeZGo5GffvqJXr16UbNmTXN65cqVGTBgANu2bcs2kmrYsGG6a7Vv3x6j0ci///6b43XOnTvH3r17GTJkiG4KggceeICGDRvq8lr7HG2R+a+45ORkLl68SIcOHTh+/LjVc0TVq1ePgIAAatSowbPPPkvt2rVZs2aN+S9CAA8PD5588sls99OgQQPq16+vux9Tk5vpfkzv58qVK3NshrL2XCYRERHUqlXL/Lpp06b4+Phw/PhxQKtx+v7773nwwQdp3bp1tuuZ3ufly5fTvn17KlSooLtuREQERqORLVu25PrsLl26lONf60uWLCEoKIhOnTqZr9m3b1+WLVuG0WjM9bx5sbXcffv21ZXT1Oxjel6O/Bw3bNjQfD3Qmm7q1atnvjbAihUraNasGY888ki2ezW9VxEREVSpUkXX7HjgwAH+/PNPHn/88WzHme734sWLlh6hWW7vIcCUKVMICAggODjYXIv1/vvv06dPn1zPu2XLFqZNm8Zjjz1m/hznZvDgwcTGxua5ZW12teTmzZt4eHhkSy9btqx5f05atmxJmzZteOedd1i0aBEnTpxg7dq1PPvss5QpUybbsVWrVuXuu+9m1qxZfPfdd4wdO5YlS5YwadIki+c3fWaLM2kyKwE++eQT6tati5ubG0FBQdSrVy9blaqbm1u2KvKjR4+SnJxMYGCgxfOeP38ewBw41KlTR7c/ICAgzypeUzNN48aNrb+hXFy4cIEbN25Qr169bPsaNGhARkYGp06dMlfZA1SrVk2Xz1Tm3Pr35HTPoAUamYe5WvscbbF9+3amTJnCzp07s1W3JycnWzVP1IoVK/Dx8aFMmTKEhIToAg2TqlWr4u7urks7evQohw8fJiAgwOJ5TffTt29fFi5cyNNPP82kSZPo3LkzUVFR9OnTx/z5s/ZcJlnfK9DeL9N7deHCBVJSUvL8PB09epQ///zT6utaopTKlmY0Glm2bBmdOnUy98UCaNOmDe+//z4bNmygS5cueZ7bXuXO67PtyM9xXu8VaP//85quwcXFhYEDBzJv3jxu3LiBp6cnS5YsoWzZsuZ+WJmZ3hdr/qCy9B6aDBs2jEcffZRbt26xceNGPvroozwD2r/++otHHnmExo0bs3DhwjyvD1CzZk3dH28FUa5cOYv9hG7dumXen5sVK1bQt29fnnrqKUBrphs7diybN2/myJEj5nzbt2+nZ8+e/PLLL+Y/PHr16oWPjw/Tpk3jqaeeyhZQK6WK9Hxq1pCAqAS4++67Lf61nJmHh0e2ICkjI4PAwMAc/zLJ6ZdycZO5X0Jmuf2ytIW9n+OxY8fo3Lkz9evX54MPPiA0NBR3d3d+/PFHPvzwQ6s6BYPWDyCvPg6WfoFmZGTQpEkTPvjgA4vHhIaGmo/dsmULcXFxrFmzhnXr1vH1119z//3389NPP+Hq6mr1uUzs9V5lZGTwwAMP5DhhXN26dXM93t/f32LAvHHjRs6dO8eyZct0nVBNlixZUqCAyNZy2/Ozbevn2J7XHjx4MO+++y7ff/89/fv3Z+nSpfTs2dNi4G96X/L6bOf0HprUqVPH3OG6Z8+euLq6MmnSJDp16mTx9+mpU6fo0qULvr6+/Pjjj5QvX96qezP1N8qLq6trnr8rKleubHH+I1NalSpVcj2+atWqbNu2jaNHj5KQkECdOnUIDg6mSpUqus/WZ599RlBQULbn8NBDDzF16lR27NiRLSBKSkqyqk9VUSYBUSlWq1Ytfv75Z+69995c/7IICwsDtL8gM/+lc+HChTxHUZlqJQ4cOJDraA9r/7IICAjA09NT99eMyV9//YWLi0u2L9n8yHzPWWW9trXP0Vo//PADqamprFq1SvdXeH6a3vKjVq1a7Nu3j86dO+f5vri4uNC5c2c6d+7MBx98wNtvv83LL79MXFycufnL2nNZIyAgAB8fHw4cOJDnPVy7ds3qEUZZ1a9fX1cDZLJkyRICAwPNoyIzi4mJ4bvvvuPTTz+lXLlyud5vTvsKWu6snPk5Np0zr/cKtBrkFi1asGTJEkJCQjh58iRz5syxmDc+Pp5KlSrlGTzUr1+fJUuWWF2j+vLLL7NgwQJeeeWVbCMbL126RJcuXUhNTWXDhg3m0VbWeO+995g2bVqe+cLCwixO4plZ8+bN2bp1KxkZGbo/cHft2oWnp2eegb5JnTp1zLWGhw4d4ty5c7oVBxITEy3WlpmW7klPT9elnzlzhrS0NPPggeJK+hCVYo899hhGo5E33ngj27709HSSkpIArY2/TJkyzJkzR/fXX14jdUBrt65RowazZs0yn88k87m8vLwAsuXJytXVlS5durBy5UrdL4/ExESWLl1Ku3bt8PHxybNcealcuTLNmzfn888/1/XZiY2N5dChQ7q81j5Ha5n+8s78fJKTk1m0aJFN58mvxx57jDNnzrBgwYJs+27evGme1fby5cvZ9jdv3hy4M6LF2nNZy8XFhV69evHDDz/w+++/Z9tvemaPPfYYO3fuZP369dnyJCUlZfuFnlV4eDgHDhzQNU/cvHmTmJgYevbsSZ8+fbJto0aN4urVq6xatQrI/TPt5eVlMb2g5c7KmZ9jgN69e7Nv3z6Lw/yz1iQNGjSIn376iVmzZuHv70+3bt0snnP37t2Eh4fnee3w8HCUUrrRU7nx8/Pj2WefZf369brh49evX6d79+6cOXOGH3/80WLzY27s2YeoT58+JCYmEhMTY067ePEiy5cv58EHH9T1Lzp27Fi2KQSyysjIYOLEiXh6evLcc8+Z0+vWrUtiYmK26VO++uorgGwjyUzPuG3btnneQ1EmNUSlWIcOHXj22WeZPn06e/fupUuXLpQpU4ajR4+yfPlyZs+eTZ8+fQgICGD8+PFMnz6dnj170r17d/bs2cPatWvzrCJ1cXFh3rx5PPjggzRv3pwnn3ySypUr89dff3Hw4EHzL37TsM0XXniByMhIXF1ds82XYfLmm2+a578ZMWIEbm5ufPbZZ6SmpjJz5ky7PZ/p06fTo0cP2rVrx1NPPcXly5eZM2cOjRo10lWBW/scrdWlSxfc3d158MEHefbZZ7l27RoLFiwgMDCwUJYLGDRoEN988w3PPfcccXFx3HvvvRiNRv766y+++eYb1q9fT+vWrXn99dfZsmULPXr0ICwsjPPnzzN37lxCQkJo166dTeeyxdtvv81PP/1Ehw4dzEP5z507x/Lly9m2bRt+fn5MmDCBVatW0bNnT/NQ8OvXr7N//36+/fZbTpw4ketn9+GHH+aNN95g8+bN5iawVatWcfXqVR566CGLx9xzzz0EBASwZMkS+vbtS/PmzXF1deWdd94hOTkZDw8P89xSrVq1Yt68ebz55pvUrl2bwMBA7r///gKX2xJnfY4BJkyYwLfffsujjz7KU089RatWrbh8+TKrVq3i008/pVmzZua8AwYMYOLEiXz33XcMHz7c4pQH58+f588//7Rq6ol27drh7+/Pzz//bFXnZ4DRo0cza9YsZsyYYW4SHThwIL/++itPPfUUhw8f1s095O3tTa9evXI9pz37EPXp04d77rmHJ598kkOHDlGpUiXmzp2L0WjMVgvVuXNnAN0fjqNHj+bWrVs0b96c27dvs3TpUn799Vc+//xzXW30qFGjWLRoEQ8++CDPP/88YWFhbN68ma+++ooHHngg29QmsbGxVKtWrXgPuQcZdl+c5TRTdVZDhgxRXl5eOe6fP3++atWqlSpXrpwqX768atKkiZo4caI6e/asOY/RaFTTpk1TlStXVuXKlVMdO3ZUBw4cyDZTddZh9ybbtm1TDzzwgCpfvrzy8vJSTZs21Q2lTk9PV88//7wKCAhQBoNBNwydLMPuldJmEo6MjFTe3t7K09NTderUKdsMzDk9n5zKaMmKFStUgwYNlIeHh2rYsKGKiYnJcYZfa56jJZaG3a9atUo1bdpUlS1bVlWvXl2988476j//+Y8CVHx8vFXny204vFLasPtGjRpZ3JeWlqbeeecd1ahRI+Xh4aEqVKigWrVqpaZNm6aSk5OVUkpt2LBBPfzww6pKlSrK3d1dValSRfXv3z/bkHFrzqWU9j6PHDkyW1kszYb+77//qsGDB6uAgADl4eGhatasqUaOHKlSU1PNea5evaomT56sateurdzd3VWlSpVU27Zt1XvvvafS0tJyfTZKacO+hw4dan794IMPqrJly+Y6m/cTTzyhypQpoy5evKiUUmrBggWqZs2aytXVVfeZS0hIUD169FDly5dXgG54uTXlzmm2ZdNzzPr/xd6f47CwMNWjR49sx3bo0CHbUPlLly6pUaNGqapVqyp3d3cVEhKihgwZYn5GmXXv3j3X2dTnzZtn09IdL7zwgqpdu7YuLbdnp5T2Hrq6upqnfzBNMWBpc8ZM35cvX1ZDhw5V/v7+ytPTU3Xo0MHid0BYWFi28i1atEg1a9ZMeXl5qfLly6vOnTurjRs3WrzOX3/9pfr06aNCQ0NVmTJlVFhYmBo/fny2z7/RaFSVK1fONiN4cWRQyk49S4UQogT54osvGDlyJCdPnsw2ZYRwjEceeYT9+/fzzz//WNzfokULOnbsyIcffmjV+Y4fP079+vVZu3atucZE2Nf333/PgAEDOHbsmE19q4oi6UMkhBAWDBw4kGrVqlnsQC3s79y5c6xZs4ZBgwZZ3L9u3TqOHj3K5MmTrT5nzZo1GTp0KDNmzLBXMUUW77zzDqNGjSr2wRCA1BAJIYRwmvj4eLZv387ChQv57bffOHbsGMHBwc4uliiFpIZICCGE02zevJlBgwYRHx/P559/LsGQcBqpIRJCCCFEqSc1REIIIYQo9SQgEkIIIUSpJxMzWiEjI4OzZ89Svnz5Yr94nRBCCFFaKKW4evUqVapUybaeZ1YSEFnh7NmzdlkfSwghhBCF79SpU4SEhOSaRwIiK5hWNT516pRd1skSQgghhOOlpKQQGhpq/h7PjQREVjA1k/n4+EhAJIQQQhQz1nR3kU7VQgghhCj1nB4QnTlzhscffxx/f3/KlStHkyZN+P333837lVK89tprVK5cmXLlyhEREcHRo0d157h8+TIDBw7Ex8cHPz8/hg4dqlvFGeDPP/+kffv2lC1bltDQULuuii6EEEKI4s2pAdGVK1e49957KVOmDGvXruXQoUO8//77VKhQwZxn5syZfPTRR3z66afs2rULLy8vIiMjuXXrljnPwIEDOXjwILGxsaxevZotW7YwbNgw8/6UlBS6dOlCWFgYu3fv5t1332Xq1KnMnz+/UO9XCCGEEEWTU2eqnjRpEtu3b2fr1q0W9yulqFKlCuPGjWP8+PEAJCcnExQUxOLFi+nXrx+HDx+mYcOG/Pbbb7Ru3RrQFgHs3r07p0+fpkqVKsybN4+XX36ZhIQE3N3dzdf+/vvv+euvv/IsZ0pKCr6+viQnJ0sfIiGEEKKYsOX726k1RKtWraJ169Y8+uijBAYG0qJFCxYsWGDeHx8fT0JCAhEREeY0X19f2rRpw86dOwHYuXMnfn5+5mAIICIiAhcXF3bt2mXOc99995mDIYDIyEiOHDnClStXspUrNTWVlJQU3SaEEEKIksupAdHx48eZN28ederUYf369QwfPpwXXniBzz//HICEhAQAgoKCdMcFBQWZ9yUkJBAYGKjb7+bmRsWKFXV5LJ0j8zUymz59Or6+vuZN5iASQgghSjanBkQZGRm0bNmSt99+mxYtWjBs2DCeeeYZPv30U2cWi8mTJ5OcnGzeTp065dTyCCGEEMKxnBoQVa5cmYYNG+rSGjRowMmTJwEIDg4GIDExUZcnMTHRvC84OJjz58/r9qenp3P58mVdHkvnyHyNzDw8PMxzDsncQ0IIIUTJ59SA6N577+XIkSO6tL///puwsDAAatSoQXBwMBs2bDDvT0lJYdeuXYSHhwMQHh5OUlISu3fvNufZuHEjGRkZtGnTxpxny5Yt3L5925wnNjaWevXq6Ua0CSGEEKJ0cmpANGbMGH755Rfefvtt/vnnH5YuXcr8+fMZOXIkoM0sGR0dzZtvvsmqVavYv38/gwcPpkqVKvTq1QvQapS6du3KM888w6+//sr27dsZNWoU/fr1o0qVKgAMGDAAd3d3hg4dysGDB/n666+ZPXs2Y8eOddatCyGEEKWW0QibNsFXX2k/jUZnlwhQTvbDDz+oxo0bKw8PD1W/fn01f/583f6MjAz16quvqqCgIOXh4aE6d+6sjhw5ostz6dIl1b9/f+Xt7a18fHzUk08+qa5evarLs2/fPtWuXTvl4eGhqlatqmbMmGF1GZOTkxWgkpOT83+jQgghhFArVigVEqIU3NkqVVIqOlqpuDil0tPtdy1bvr+dOg9RcSHzEAkhhBAFFxMDffpoYVBOQkJg9myIiir49YrNPERCCCGEKB2MRhg9OvdgCOD0aS1oiokpnHKZSEAkhBBCCIfbulULdqyhFERHF27fIgmIhBBCCOFw587Zlv/UKS2IKiwSEAkhhBDC4SpXtv2YM2fsX46cSEAkhBBCCIdr3x4qVbLtmAsXHFMWSyQgEkIIIYTDubrCfffZdkxAgGPKYokEREIIIYQoFEeP2pa/alXHlMMSCYiEEEII4XBGIxw4YH3+SpW0ZrbCIgGREEIIIRxuw4a85yDK7OOPtWa2wiIBkRBCCCEcbtEi2/IHBTmmHDmRgEgIIYQQDvfLL7blt3XeooKSgEgIIYQQDufublv+/MxbVBASEAkhhBDC4cLCrM/r7l64HapBAiIhhBBCOJjRaFuTWd++hduhGiQgEkIIIYSDbdoEV69an3/QIIcVJUcSEAkhhBDCoTZutC3/xYuOKUduJCASQgghhEOdPGlb/sLuUA0SEAkhhBDCwapVsz5vSEjhd6gGCYiEEEII4WAdO1qf95lnCr9DNUhAJIQQQggHc8kl2hjNLBQG3uIlAOrUKaRCZSEBkRBCCCEc6vz57Gke3EJhYBZjABjIEsA5/YdAAiIhhBBCOFjWIKcL67lFOV3afWwhIMA5/YdAAiIhhBBCOFj79uDvD6DYxr2sp6t53yoexIDiJGHMneuc/kMAbs65rBBCCCFKk9rGv7hIA11aO7aynXYAeHvDI484o2QaqSESQgghhEOd7TeWX5LuBEMXqEQZ0szBEMC1a7B1qzNKp5EaIiGEEEI4xpUrULEioZmSnuQ/LOZJi9nPnSucYlkiAZEQQggh7O/zz+GJJ3RJFbnEFSrmeIizRpiBNJkJIYQQwp7S0yE4WBcMZTz/AqEhiiSD5WDIYIDQUOeNMAMJiIQQQghhLzt3QpkykJh4J+3QIVw+ms3s2dpLg0F/iOn1rFnOG2EGEhAJIYQQwh5694a2be+8btMGMjKggdaZOioKvv0WqlbVHxYSoqVHRRViWS2QPkRCCCGEyL9Tp7Kv3vrjj9CtW7asUVHw8MPaaLJz57Q+Q+3bO7dmyEQCIiGEEELkz/Tp8NJL+rQbN6BcOcv50YIfWxZ7LSwSEAkhhBDCNtevazMpZvbuuzB+vHPKYwcSEAkhhBDCeitXQq9e+rQzZ6BKFacUx16kU7UQQggh8paRAc2b64Ohfv1AqWIfDIHUEAkhhBAiLwcOQJMm+rRff4W77nJOeRxAaoiEEEIIkbPhw/XBUPXq2uSLJSgYAqkhEkIIIYQlFy9CQIA+belS6N/fOeVxMKkhEkIIIYTe/PnZg6GkpBIbDIEEREIIIYQwSUsDHx949tk7aZMnax2nfX2dV65CIE1mQgghhIBNm6BTJ33a0aNQu7ZTilPYpIZICCGEKM2Ugq5d9cHQ/fdrw+xLSTAEUkMkhBBClF7Hj0OtWvq0n3+Gzp2dUx4nkhoiIYQQojR69VV9MFS2LNy6VSqDIZAaIiGEEKJ0uXpV6zid2ccfw8iRzilPESEBkRBCCFFafPMN9O2rT0tIgKAg55SnCJEmMyGEEKKkMxq1DtKZg6GhQ7UO1RIMAU4OiKZOnYrBYNBt9evXN++/desWI0eOxN/fH29vb3r37k1iYqLuHCdPnqRHjx54enoSGBjIhAkTSE9P1+XZtGkTLVu2xMPDg9q1a7N48eLCuD0hhBDC+f74A9zc4NixO2l79sDChc4rUxHk9BqiRo0ace7cOfO2bds2874xY8bwww8/sHz5cjZv3szZs2eJiooy7zcajfTo0YO0tDR27NjB559/zuLFi3nttdfMeeLj4+nRowedOnVi7969REdH8/TTT7N+/fpCvU8hhBCi0A0ZAq1a3XndqJFWW9S8udOKVFQZlFLKWRefOnUq33//PXv37s22Lzk5mYCAAJYuXUqfPn0A+Ouvv2jQoAE7d+7knnvuYe3atfTs2ZOzZ88S9L8qv08//ZQXX3yRCxcu4O7uzosvvsiaNWs4cOCA+dz9+vUjKSmJdevWWVXOlJQUfH19SU5OxidrRzQhhBCiqElIgMqV9WkrVkCmSoXSwJbvb6fXEB09epQqVapQs2ZNBg4cyMmTJwHYvXs3t2/fJiIiwpy3fv36VKtWjZ07dwKwc+dOmjRpYg6GACIjI0lJSeHgwYPmPJnPYcpjOoclqamppKSk6DYhhBCiWJg9O3swdPVqqQuGbOXUgKhNmzYsXryYdevWMW/ePOLj42nfvj1Xr14lISEBd3d3/Pz8dMcEBQWRkJAAQEJCgi4YMu037cstT0pKCjdv3rRYrunTp+Pr62veQkND7XG7QgghhOPcugWurhAdfSdt2jSt47S3t9OKVVw4ddh9t27dzP9u2rQpbdq0ISwsjG+++YZy5co5rVyTJ09m7Nix5tcpKSkSFAkhhCi6fvoJIiP1aSdOQFiYU4pTHDm9ySwzPz8/6tatyz///ENwcDBpaWkkJSXp8iQmJhIcHAxAcHBwtlFnptd55fHx8ckx6PLw8MDHx0e3CSGEEEWOUtCunT4Y6tlTS5dgyCZFKiC6du0ax44do3LlyrRq1YoyZcqwYcMG8/4jR45w8uRJwsPDAQgPD2f//v2cP3/enCc2NhYfHx8aNmxozpP5HKY8pnMIIYQQxdLff4OLC2zffidtyxb44QfnlakYc2pANH78eDZv3syJEyfYsWMHjzzyCK6urvTv3x9fX1+GDh3K2LFjiYuLY/fu3Tz55JOEh4dzzz33ANClSxcaNmzIoEGD2LdvH+vXr+eVV15h5MiReHh4APDcc89x/PhxJk6cyF9//cXcuXP55ptvGDNmjDNvXQghhMi/8eOhXr07r/39IS0N2rd3XpmKOaf2ITp9+jT9+/fn0qVLBAQE0K5dO3755RcCAgIA+PDDD3FxcaF3796kpqYSGRnJ3Llzzce7urqyevVqhg8fTnh4OF5eXgwZMoTXX3/dnKdGjRqsWbOGMWPGMHv2bEJCQli4cCGRWdtahRBCiKLuyhWoWFGftnChNuu0KBCnzkNUXMg8REIIIZzuiy9g8GB92sWLWu2QsKhYzUMkhBBCiFykp0OVKvpg6PnntY7TEgzZjax2L4QQQhRVO3dC27b6tIMH4X8Dh4T9SA2REEIIURT16aMPhu66CzIyJBhyEKkhEkIIIYqSU6egWjV92urV0KOHc8pTSkgNkRBCCFFUzJiRPRi6cUOCoUIgNURCCCGEs924AV5e+rSZM2HCBOeUpxSSgEgIIYRwph9+gIce0qedPg1VqzqnPKWUNJkJIYQQzqAUtGihD4b69tXSJRgqdFJDJIQQQhS2AwegSRN92q5dcPfdzimPkBoiIYQQolCNHKkPhkJDtckXJRhyKqkhEkIIIQrDxYvwv7U6zZYsgQEDnFMeoSM1REIIIYSjLViQPRi6ckWCoSJEAiIhhBDCUdLSwMcHhg27k/bii1rHaT8/pxVLZCdNZkIIIYQjbN4MHTvq0/7+G+rUcUpxRO6khkgIIYSwJ6WgWzd9MNSxo7YOmQRDRZbUEAkhhBD2Eh8PNWvq02JjISLCOeURVpMaIiGEEMIepkzRB0Pu7nDrlgRDxYTUEAkhhBAFcfWq1nE6szlzYNQo55RH5IsEREIIIUR+ffstPPqoPi0hAYKCnFMekW/SZCaEEELYKiMD6tbVB0NPPaV1qJZgqFiSGiIhhBDCFnv2QMuW2dOaN3dKcYR9SA2REEIIYa0nntAHQ/Xrg9EowVAJIDVEQgghRF4SEqByZX3at99C797OKY+wO6khEkIIIXLz0UfZg6GrVyUYKmEkIBJCCCEsuXUL3Nxg9Og7aVOnah2nvb2dVizhGNJkJoQQQmT1008QGalPi4+H6tWdUhzheFJDJIQQQpgoBffdpw+GevTQ0iUYKtGkhkgIIYQAbSX6evX0aVu2QPv2zimPKFRSQySEEEJMnKgPhipUgLQ0CYZKEakhEkIIUewZjbB1K5w7pw0Ia98eXF2tODApSQt+Mlu4EIYOdUQxRREmAZEQQohiLSZGGwh2+vSdtJAQePJJrR/0tWvQrh08/7y2AL3Zl1/CoEH6k128CP7+hVJuUbQYlFLK2YUo6lJSUvD19SU5ORmfrCsaCyGEcAqjEd54A6ZNs/6YTp1g3ep03OtWhzNn7uwYORI+/tjuZRTOZcv3t9QQCSGEKHZiYuCZZ+DyZduOuxH3C+5e4frEAwegUSP7FU4US9KpWgghRLESE6NNEm1rMPQ1j/ELd4KhU8GttVXrJRgSSA2REEKIYsRohBdesO2YqpzmNKG6tJ78wLoLPblxO0u/IlFqSQ2REEKIYmPgQH3Xn7xM5J1swZAn11lDT4xGmDvXzgUUxZbUEAkhhCgWJk6Er7+2Lm85bnADL/3xvMO7TNSlHTtmr9KJ4k4CIiGEEEVeWhp88IF1eXuwmtU8qEsL4RRnCMmW99o1e5ROlATSZCaEEKLImztX6z+UO8VvtNYFQ1/zGAaUxWAIIDbWmvOK0kACIiGEEEXe0aO572/IQRQutGa3Oa0Nv9CP3NvYzpzRZrgWQgIiIYQQRd7x4znvm8MoDtLY/PoUIbhxm19pY9W5z50raOlESSB9iIQQQhRpRiPExWVP9+ciFwnQpT3OFyzhcZvOX7lyQUonSgqpIRJCCFGkbdwIqan6tKEszBYM+XHF5mAoNFQWtBcaqSESQghRaPKzKv0XX9z5txu3OU8gFUgyp81kAi8yM1/l6dcv7+uL0kFqiIQQQhSKmBioXl1bYHXAAO1n9epaem6uXtV+tmMrt3HXBUN1OZLvYAhg2TIZZSY0EhAJIYRwuOXLtfXHTp/Wp585A3365B4UtW0Lq+nBVu4zp22iAwYyOErdApXr1CkZZSY0RSYgmjFjBgaDgejoaHParVu3GDlyJP7+/nh7e9O7d28SExN1x508eZIePXrg6elJYGAgEyZMID09XZdn06ZNtGzZEg8PD2rXrs3ixYsL4Y6EEEIAfPst9O9veZ9S2s/o6Bxqak6cYMJEAz340Zz0AD/RiU2AwS7lk1FmAopIQPTbb7/x2Wef0bRpU136mDFj+OGHH1i+fDmbN2/m7NmzREVFmfcbjUZ69OhBWloaO3bs4PPPP2fx4sW89tpr5jzx8fH06NGDTp06sXfvXqKjo3n66adZv359od2fEEKUVjEx8OijuTdLKZVDTc20aVCjhvllGmXw4BY/84BdyyijzAQAysmuXr2q6tSpo2JjY1WHDh3U6NGjlVJKJSUlqTJlyqjly5eb8x4+fFgBaufOnUoppX788Ufl4uKiEhISzHnmzZunfHx8VGpqqlJKqYkTJ6pGjRrprtm3b18VGRlpdRmTk5MVoJKTk/N7m0IIUeqkpioVEKCUFvLkvS1d+r8DU1Ky7RzFR1afx9rNYFAqNFSp9HSnPibhQLZ8fzu9hmjkyJH06NGDiIgIXfru3bu5ffu2Lr1+/fpUq1aNnTt3ArBz506aNGlCUFCQOU9kZCQpKSkcPHjQnCfruSMjI83nsCQ1NZWUlBTdJoQQwnoxMVClCly4YP0xlSujta/5+OjSP3k1gY953q7lM/yvtW3WLBllJjRODYiWLVvGH3/8wfTp07PtS0hIwN3dHT8/P116UFAQCQkJ5jyZgyHTftO+3PKkpKRw8+ZNi+WaPn06vr6+5i00NDRf9yeEEKVRTIzWgfrSJeuP8fPJoMNz9bX2NZMnngCl+Hl/UI7HWWPwYAjJspRZSIgWe2XqhSFKOafNQ3Tq1ClGjx5NbGwsZcuWdVYxLJo8eTJjx441v05JSZGgSAghrGA0wrBhth3TnD3sSWkJmSvj//gDWrQAwMsr/+VxcYEFC7RaIFvnPxKli9NqiHbv3s358+dp2bIlbm5uuLm5sXnzZj766CPc3NwICgoiLS2NpKQk3XGJiYkEBwcDEBwcnG3Umel1Xnl8fHwoV66cxbJ5eHjg4+Oj24QQQuRt0ybbaob+j6fYQ8s7CfXqaVHV/4IhgGbN8l+ecePA3V0Lfjp21Ea7dewowZDIzmkBUefOndm/fz979+41b61bt2bgwIHmf5cpU4YNGzaYjzly5AgnT54kPDwcgPDwcPbv38/58+fNeWJjY/Hx8aFhw4bmPJnPYcpjOocQQgj7yfLrNkeBJKIw8BSL7iR+8w389ZdWrZNJlSq2l8PFBSZMgJn5n7NRlDJOazIrX748jRs31qV5eXnh7+9vTh86dChjx46lYsWK+Pj48PzzzxMeHs4999wDQJcuXWjYsCGDBg1i5syZJCQk8MorrzBy5Eg8PDwAeO655/j444+ZOHEiTz31FBs3buSbb75hzZo1hXvDQghRwsXEaJ2U8zKKOczhBX1iSgqUL28xf9WqtpXjiSfgs8+0miEhrFWk1zL78MMPcXFxoXfv3qSmphIZGcncuXPN+11dXVm9ejXDhw8nPDwcLy8vhgwZwuuvv27OU6NGDdasWcOYMWOYPXs2ISEhLFy4kMjISGfckhBClEimjtS5cSeVa3hThjuT507jNTrETaOj5VgI0Pr7hIRkn+XaEoNBq6WSJjFhK4NSpnlCRU5SUlLw9fUlOTlZ+hMJIUQWRiMEBeXed6gzP2ebULEGxzGG1iA+Pu8AJiZGW+LD2m+suDitr5Ao3Wz5/nb6PERCCCGKt9w7Uis20UEXDK2hOwYUJ6hh9TxAUVHaMPmKFa0rkyzHIWxVpJvMhBBCFH2bNllOr83RbIuv3sdmtnIfrq5aH2pb5gGKigJfX8gy165FshyHsJXUEAkhhCiQjIzsaTN4URcMJeGLO6nmFevffjt/kyJ27Kj1JzLksK6rwQChoVq/IyFsIQGREEKIAvH3v/NvX5JQGHiRO+Pdn2E+FUjiNneGfUVH5+9arq4we7b276xBkSzHIQpCAiIhhBAFcvKk9nMAS0iigm5fJS6wkGd0aabJEvPL1J8o63B8WY5DFISMMrOCjDITQgjLjEaoEmTk90vVCeXOuPhPGMEoPsmW/8EHYdUq+11bluMQubHl+1s6VQshhMi3vfN/JfFSG11aY/ZzkMbZ8vbsab9gCO4sxyGEPUhAJIQQpYzdalb69qXVN9+YX+6mJa35Hcje47llS/jhh/yXWQhHk4BICCFKkZgYGD1aP+tzpUowdy48+qiVJzlzRuuwk8mDrGI1D+Z4yPvv56OwQhQi6VQthBClhGl5jaxLYFy8CI89BhMnWnGSd9/NFgyVd7meazDk6gpt2+ajwEIUIgmIhBCiFDAaYdiw3PO8+642SsuiGze0ce2Zo6YZM9gUp7iW4ZnntXfssK28QhQ2CYiEEKIUyH15jTtGjNACGJ01a8DLS5928iS8+CKnTll3/X//tS6fEM4iAZEQQpQCGzdal+/CBa3DNaCtpNq6tTY8zMS0wmpoKAC7dll33pEjtSY7IYoq6VQthBAlRG6jx7Zts/48K1dCx8BD0KiRfsfOnXDPPboka2eyu35di6UcNXGizEkkCkoCIiGEKAEsjR4LCYEPP4QKFWDLFuvPVXPW8zDr4zsJVapobV5u2b8y6tSxrZzR0fDww/YNVnK699mzZdZqYT27zFSdlJSEn5+fHYpTNMlM1UKIoiwm5k5LVkFU5BKXqKRP/O9/YdCgHI9JSwNPTwv9jnIRF2e/CRVzunfTumaylEfpZsv3t819iN555x2+/vpr8+vHHnsMf39/qlatyr59+2wvrRBCiHwzGrXakYIGQ0/xf9mCoW0/XMk1GAJtTbKxY2271rlztpbOstzu3ZQWHW1bsCZKL5sDok8//ZTQ/3Wmi42NJTY2lrVr19KtWzcmTJhg9wIKIYTI2dat2ecVsoUbt7lERf6Pp81p7zIeA4pTV/2sOsfMmTBhArhY+Y1y9Gg+CmpBXveuFJw6lamTuBC5sDkgSkhIMAdEq1ev5rHHHqNLly5MnDiR3377ze4FFEIIkbOC1La0Yyu3caciV8xpdTnCRN4FtM7J1po5E65dA2t6FSxYYJ9aG2vv3V41UqJkszkgqlChAqf+N/HEunXriIiIAEAphVHqJYUQolAdOZK/436gJ1u5z/x6C+0xkMFR6mIwaKPq27e37ZzlysG4cXnnO33aPrU21gZstgR2ovSyeZRZVFQUAwYMoE6dOly6dIlu3boBsGfPHmrXrm33AgohhMjOaISBAyFTl06rhHGCE9TQpXVhPbF00aXNmpW/kWDWjjqzR61N+/baaLIzZyz3IzIYtP22BnaidLK5hujDDz9k1KhRNGzYkNjYWLy9vQE4d+4cI0aMsHsBhRBC6MXEQFCQ7cHQq7yuC4aMuFCWm9mCoa+/zv/IrMKstXF11YbWw51RZSam1/kN7ETpY/Ow+y1bttC2bVvcssxHkZ6ezo4dO7jvvvtyOLL4kmH3QoiiwrRAqy28uMY1yuvSRjOLjxhtMX9BhsUbjVC9et61NvHx9gtULM1DFBqqBUMy5L50s+X72+aAyNXVlXPnzhEYGKhLv3TpEoGBgSWyH5EEREKIosAUbNgyquwRYohBH0EFc45EgnM8ZulS6N8/n4XkztxAoA+KHDk3kMxULSxx6DxESikMWesm0QIir6yL/wkhhLAbW4bYG8jgIA11wdDnDMaAyjUYgoI3Z0VFaUFP1ar69JAQx02U6Oqq1Wr176/9lGBI2MrqTtVR//sEGwwGnnjiCTw8PMz7jEYjf/75J23btrV/CYUQQgDw3nvW5WvGXvbSQpfWkt3soWWex7q52acTclSUtkSHI2ptpDZIOILVAZGvry+g1RCVL1+ecuXKmfe5u7tzzz338Mwzz9i/hEIIIZg4EdasyTvfQoYylP+YX/9NHRpwmAysixiqVbNfcGGqtbEnWbdMOIrVAdGiRYsAqF69OuPHj5fmMSGEKCRpafD++7nnCeA85wnSpT3KN3zLozZdq0uXvPM4S07rlp05o6XLumWiIOyyuGtJJ52qhRAFVZBmnvvv10Z+5WQEn/AJo3Rp5UnJNrLMGjduaBMsFjV5dSh3xOg1Ufw5tFN1YmIigwYNokqVKri5ueHq6qrbhBBC3GE0wrRp4OcHnTrBgAHaz7AwrcYjLxMn5hwMuZPKLTx0wdDrvIoBla9g6N57i2YwBLJumXA8m2eqfuKJJzh58iSvvvoqlStXtjjiTAghhBbwDBmirfGV1Zkz2nxCK1bk3MyTlpZzR+r72cAGInRpNTlGPDXzXd6NG/N9qMPJumXC0WwOiLZt28bWrVtp3ry5A4ojhBAlg7UTKA4bpo3GslTB/uyzliY3VGygM/dzp9poLV3pzo9Awf5A3bHD/p2g7UXWLROOZnOTWWhoKNLtSAghcmY0aiOhrHHpEmzaZPkcy5fr02pzFIWLLhjqwCa6s5aCBkNQtGtXTOuW5dQokd8FaYUwsTkgmjVrFpMmTeLEiRMOKI4QQhR/tkygCJabqrZuhevX77x+m8kcpa75dTI+uJPKFjoUoKR6Rbl2RdYtE45mc5NZ3759uXHjBrVq1cLT05MyZcro9l++fNluhRNCiOLo1KmC5zfV1viQTDJ+un3D+IwFDMtf4XJQHFaFN82AbWkeIlm3TBSUzQHRrFmzHFAMIYQoOWbOtC1/errWRJa5dqNyZejPUpYyUJe3Ehe4RCU7lFJv9uziUbviyBmwRekm8xBZQeYhEkJYKy0NMq1sZDV/f5g//3+1HEYjqmZNDCdPmvfP4zlGMM9+Bc0kOho+/NAhpxbCqRw6DxHAsWPHeOWVV+jfvz/nz58HYO3atRw8eDA/pxNCiBJj7tz8HXfpkjYqLe6dX8HNTRcMNWa/w4IhgJ49HXZqIYoNmwOizZs306RJE3bt2kVMTAzX/jfBxr59+5gyZYrdCyiEEMXJsWP5P/Yr+tFpUhvz6z00xwUjB2lsh5IJIXJjc0A0adIk3nzzTWJjY3F3dzen33///fzyyy92LZwQQhQ3tWrZfkwVzqAw0I+vzWn731xJS/ag8leRb5OEBIdfQogiz+b/afv37+eRRx7Jlh4YGMjFixftUighhCiuRoywLf843uMMIbq0iSOu0XDSQ1SpYseC5eLChcK5jhBFmc2jzPz8/Dh37hw1atTQpe/Zs4eqVavarWBCCFEcubpqm9GYe76y3OQmnrq0ybzNDCZjmAf/XoQrVxxY0EwCAgrnOkIUZTbXEPXr148XX3yRhIQEDAYDGRkZbN++nfHjxzN48GBHlFEIIYqNrVvzDoa68WO2YCiUk8xgMqAt1/HNN3DzpqNKqVeQfk9ClBQ2B0Rvv/029evXJzQ0lGvXrtGwYUPuu+8+2rZtyyuvvOKIMgohRLGR+/IXil3czY/0MKfE8AgGFKcJdXjZcjJ1qrb2mhClWb7nITp16hT79+/n2rVrtGjRgjp16ti7bEWGzEMkhLDWpk3QqVP29AYc4hCNdGnh7OAXwgunYLkwGLTZnuPji+8Eh0ajTNYosnP4PESgLfLavXt3evfuzfXr17lSWI3dQgjhIEajFtB89ZX2M6+mL0tMi5BmNovRumDoLJVx43ahBEMGA/Ttm3sepbTlQ7ZudXhxHCImBqpX1wLRAQO0n9WrS62XsI3NAVF0dDT/93//B4DRaKRDhw60bNmS0NBQNllaslkIIYoBe32pZl6EtAKXURgYzUfm/YP5nKqcxWj7mJZcTZuWPRALDdXW/nr4YevOUZRXu89JTAz06ZN9Md0zZ7R0CYqEtWwOiL799luaNWsGwA8//MDx48f566+/GDNmDC+//LJN55o3bx5NmzbFx8cHHx8fwsPDWbt2rXn/rVu3GDlyJP7+/nh7e9O7d28SExN15zh58iQ9evTA09OTwMBAJkyYQHp6ui7Ppk2baNmyJR4eHtSuXZvFixfbettCiBIsJkabJTrrl+rp0/n7Uo2Kgt9H/IfL+OvSK3CZL7D/4JPQUHj5ZThxAuLiYOlS7Wd8vFYWa1exL8qr3VtiNGoLvVrq+GFKi47OX02fKIWUjTw8PNSpU6eUUko988wzavTo0UoppY4fP67Kly9v07lWrVql1qxZo/7++2915MgR9dJLL6kyZcqoAwcOKKWUeu6551RoaKjasGGD+v3339U999yj2rZtaz4+PT1dNW7cWEVERKg9e/aoH3/8UVWqVElNnjzZnOf48ePK09NTjR07Vh06dEjNmTNHubq6qnXr1lldzuTkZAWo5ORkm+5PCFH0pacr5eWllPYVankLDdXyWSUtTamKFXUneI+xuZ6/oNuKFXnfY0iIUgaD5eMNBhvvsYiIi7Pu+cTFObukwlls+f62OSCqVq2aWr9+vUpPT1ehoaFq9erVSimlDhw4oPz8/GwvbRYVKlRQCxcuVElJSapMmTJq+fLl5n2HDx9WgNq5c6dSSqkff/xRubi4qISEBHOeefPmKR8fH5WamqqUUmrixImqUaNGumv07dtXRUZGWl0mCYiEKLnuvdeOX6rbtmU7sC5/OTQYmjLFuvv85pucgyGDIe+gqihautS6Z7R0qbNLKpzFlu9vm5vMnnzySR577DEaN26MwWAgIiICgF27dlG/fv1811QZjUaWLVvG9evXCQ8PZ/fu3dy+fdt8foD69etTrVo1du7cCcDOnTtp0qQJQUFB5jyRkZGkpKSYF5rduXOn7hymPKZzWJKamkpKSopuE0KUPN98A9u3W5f3zJk8Mjz0ELRrd+f1vfdivJ3BybL18l0+a8yZk3eTXkwMjB1reV9IiNbPKCrK/mVztJLaFCicw+ZefVOnTqVx48acOnWKRx99FA8PDwBcXV2ZNGmSzQXYv38/4eHh3Lp1C29vb7777jsaNmzI3r17cXd3x8/PT5c/KCiIhP8tvJOQkKALhkz7Tftyy5OSksLNmzcpV65ctjJNnz6dadOm2XwvQojiw2iEoUOtz5/T8hbG4//iWqu6Lm3fzPUcCulC4GZtlJcjXb6s9XPKKagxdTrOaYKVDz4onsEQ3BnRd+aM5fszTSfQvn3hl00UP/ka5tCnTx9A6/RsMmTIkHwVoF69euzdu5fk5GS+/fZbhgwZwubNm/N1LnuZPHkyYzP9OZWSkkJoqPMmTRNC2N/WrXDtmvX5LS1vcaj/GzRc9pr5dQYGPLlB6sSydiihbaKjoWdP2LHjzlw8bdvm3OkYtIBh7Fh45JHiOWePaURfnz7avWS+T1MgOmtW8bw3UfhsbjIzGo288cYbVK1aFW9vb44fPw7Aq6++ah6Obwt3d3dq165Nq1atmD59Os2aNWP27NkEBweTlpZGUlKSLn9iYiLBwcEABAcHZxt1ZnqdVx4fHx+LtUMAHh4e5pFvpk0IUbL8+69t+XVLNV67BgaDLhiK5kNcySCVwg+GTPMIVa2qnzagatXsI+csHVdc5x8CrXbr22+zvD8U76ZA4Rw2B0RvvfUWixcvZubMmbi7u5vTGzduzMKFCwtcoIyMDFJTU2nVqhVlypRhw4YN5n1Hjhzh5MmThIdrk5mFh4ezf/9+zp8/b84TGxuLj48PDRs2NOfJfA5THtM5hBCl04oVtuU3N7t89x2UL6/bV5mzzCbaLuUqiIsXc3+dk+I4/1BmUVE5TzkghNVs7bFdq1Yt9fPPPyullPL29lbHjh1TSmkjwGwdZTZp0iS1efNmFR8fr/788081adIkZTAY1E8//aSU0obdV6tWTW3cuFH9/vvvKjw8XIWHh5uPNw2779Kli9q7d69at26dCggIsDjsfsKECerw4cPqk08+kWH3QghVu7b1I7k8PZVSRqNSDRvqdnzOIIeOICusTYali5LKlu9vm/sQnTlzhtq1a2dLz8jI4Pbt2zad6/z58wwePJhz587h6+tL06ZNWb9+PQ888AAAH374IS4uLvTu3ZvU1FQiIyOZO3eu+XhXV1dWr17N8OHDCQ8Px8vLiyFDhvD666+b89SoUYM1a9YwZswYZs+eTUhICAsXLiQyMtLWWxdClCCpqdbnnRi5D1yb69Ja8Tt/0Mq+hSpk0ulYiDtsXty1VatWjBkzhscff5zy5cuzb98+atasyeuvv05sbCxbi3NjdA5kcVchSp7gYMjSvdCiBTzN09zpH/kPtajHETIo3j11TZ2OpZ+NKMls+f62uYbotddeY8iQIZw5c4aMjAxiYmI4cuQI//3vf1m9enW+Cy2EEIUlJibvYCiA85xHP2XHY3zNch5zYMnyVr48XL1653VAQM5TAmSWNV9IiDYCS4IhITQ21xABbN26lddff519+/Zx7do1WrZsyWuvvUaXLl0cUUankxoiIUoOo1FbtDW30VfDmctcRurSAj2SuZDq/P//3t766QKqVoVbt+DSpZyPCQ2Ff/7RD8lv316Go4uSz2E1ROnp6bz99ts89dRTxMbGFqiQQgjhaEajNqQ8cxCwdWvOwZA7qSTjS1nudDB6k5d5lTfBhj5HjpR17qSzZ3OeZ8ikXz9wd4eOHR1WLKew9P5KkCfyy6Zh925ubsycOTPbavJCCOEMRiNs2gRffaX9zLyqeUyMVjOSeV6e0FBYudLyuTqxkVTK6oKhmhzTgqEizJo6/mXLSt6K7zExWk1f5ve3evW8lzERIic2z0PUuXNnp88kLYQQMTEQFqb/QgwL09K//hp6984+v865c1q/GT3FBu5nI53NKevpgoEM4qnp6NsoFMV98sWsTMuRZK3pO3NGS5egSOSHzZ2qu3XrxqRJk9i/fz+tWrXCy8tLt/+hhx6yW+GEEMKSmBgt4MnqzBnL6TmpyTGOoZ9GpCNxbKZjwQpoZ1mXpciP4j75oonRmPNyJEppzyo6Gh5+WJrPhG1s7lTt4pJzpZLBYMBY0uplkU7VQhQlRiMEBeXeidgab/ESLzHd/Poq3vhzidu453KUc0ydqm0FERdXMvoQbdqk1QbmpaTcrygYW76/bW4yy8jIyHEricGQEKJo2bSpYMGQD8koDLpg6Dnm4cNVi8FQZKRjaxoqVoRp02DcuOzXcXWFCRPglVe0YfKmuYNsYTBofadKyuSL1tZ0lZQaMVF4bA6IhBDCmT75JP/H9uMrkvHTpQVwns94LsdjunaFwMD8XzM3lSrBp59C48bwwQfZOz4bjfDee1pH8NmztbSsQVHm1zntK0krvleubN98QpjkKyDasGEDPXv2pFatWtSqVYuePXvy888/27tsQgihYzRqa6vaygUj8VTnKwaY0z7lWQwoLhKQ83EuMGIE1KuXn9Lm7dIleOwxGDYs9z5Cpj4xllZ1r1pVW6h2xYrSseJ7+/a515aVtBoxUXhsDojmzp1L165dKV++PKNHj2b06NH4+PjQvXt3PinIn25CCJGHadNsP6YVv2PEjer8a05rwp8M59M8jx03Tpu/p1Yt269rDVMQlFsToFL6UWJZAyfT69Ky4rura961ZSWpRkwUIltXjq1ataqaM2dOtvSPP/5YValSxdbTFQuy2r0QzpeerpSbm22ruC+hvy5hD82UAaNVx7q7a9dUSqmHHsrfKvJDhij1yiv2WZE+OlopgyF7usGgbStWOPXtKXQrVigVEqJ/FqGhpe85iNzZ8v1tcw1RUlISXbt2zZbepUsXkpOT7RCiCSFEdhs3grVzwlbmLAoDA/jKnPYw39OCvSgrK8bT0u7Uyty4YWtpNZGR0LBh/o7N6ssvcx5qDlqzWmka11JaasRE4bE5IHrooYf4zkIj/sqVK+nZs6ddCiWEEFnNn29dvrG8z1n0nWm8uMYqHrb5mqaRSq1b23wooHXsLWjnXoNBW5j14sWc82RtVistXF21ofX9+2s/pZlMFITNEzM2bNiQt956i02bNhEeHg7AL7/8wvbt2xk3bhwfffSROe8LL7xgv5IKIUq1dety31+Wm9zEU5f2Em8xnZfyfU3T6LJOnWDGDOuPMxi0jr+mjr0hIdqkkbZOrmjqEzNwoKUZtrNz9lBzWVtMFGc2T8xYo0YN605sMHD8+PF8FaqokYkZhXA+V1fIyLC8rytrWUt3XVooJzlNaIGuWbUqfPSRtsJ8ZKT1xxkM+tFdpqUmwLagKDRUC4QqViz6kxHGxGgzSGdeTiMkROsALc1Ywlls+f62OSAqjSQgEsL5LAdEip2Ecw+7zCnf0YsoLI/N9/eHUaOsH61mqqGJitKGtVvDFMRkDQIsBQyWBATAhx9qwZiphsWa2bn9/SEx0Tk1MqaAL+u3ien5lbSh/6L4cOhM1UII4QweHvrX9TmMwkUXDLVlu8VgyNtbC4ISE+Gee6y/pukL/qefrMs/aFDOHXujomDx4rzPceGCFgwVlz4xea0tBqWvw7coniQgEkIUGUajtjTHV19pPzN/iYaE3Pn3h0RzmDvDtxIIwo3b7KStxfNev67NBu3qCkuW2FYmpeDqVevyDhmSexBz/rx158naF2jr1ryXK7l0ybpO1bk94/zYujX3Wq/S2uFbFD8SEAkhioTlyyE4WOsrM2CA9rN6da05BrQlLCpwGYWBaGabjxvCYiqTgDGPMSKmWopr1/JXPm/v3Pf7++fdfye/y07Ya/2umBjtmeb0jPND1hYTJYUEREIIp5s4UVvCIuvQ8tOntb4pMTHQ88IiLuOv21+By/yXIXmeP3MtRbt2+SvjhAm57//007ybuPK77IQ91u8y9fPJWptz5sydZ5wfsraYKCkkIBJCONW338K77+a831XdpsOjAbg8/ZQ57UOiMaBIooJN1zp3Dp5/XlujzFqmIOXll7WO1Zmb7jIbMybvoMLVVVvE1VJ/m9yWnSjo+l2O7Ocja4uJksLmgGjdunVs27bN/PqTTz6hefPmDBgwgCtXrti1cEKIks1ohKFDc97flu3cxh3/jDtVRz/NPsw03w/zdb3KlbW1ycaNsy5/1iAlKkoLaCyxpqYlJgbGjrW8L7eFWAu6fpcj+/nI2mKipLA5IJowYQIpKSkA7N+/n3HjxtG9e3fi4+MZm9P/dCGEsGDTJvjfr5NsvqMX27nTvnWhTjhkZNDlhfpcugQ//wyvvKJt69fbVksxc6bWBJZXTVHWIMVozDmgyaumJacmK5MPPsh9aHpUlOXV7q1Z0d7R/XwKUjYhigqb5yHy9vbmwIEDVK9enalTp3LgwAG+/fZb/vjjD7p3705CQoKjyuo0Mg+REI7x8svw9tv6tFBOcpIwXVpX1jLh56507pzzuXKa/DC3uXDS0mDuXDh2TFvR/tlnYdeunGda3rQpfxMkGo0QFqbVIllimtk6Pj7vmpT8zAad33LbSmaqFkWNLd/fNi/d4e7uzo3/rXT4888/M3jwYAAqVqxorjkSQghrnDypf/0Sb/EWr+jSynGDW5RjbB79W0y1FJZmS7Y0USJozWfR0fq03AKC/Na0vPVWzsEQ6Jus8gpITOt32cLUzyen5UOyLjWSX/kpmxBFhc0BUbt27Rg7diz33nsvv/76K19//TUAf//9NyE59TYUQggLTL8yPLnOdfTj2sfwAbMYY369dSt06ZL7+aKi4OGHHVdLkZ8RVTExMGWKdcc5ami6qZ9Pnz5a8GOpBk36+YjSzuY+RB9//DFubm58++23zJs3j6r/azReu3YtXbt2tXsBhRAl1+nT8DDfZwuGKnNWFwzZwpEroNs6oso0ustajhyaLv18hMidrGVmBelDJIT9vTghg8ffa0YTDpjTvmQgg/jSYv6ffybXPkSFJa+FWr/5Bh59VPu3tX13QAukrOlDVFDSz0eUJg7tQwRw7NgxFi1axLFjx5g9ezaBgYGsXbuWatWq0ahRo3wVWghResS+/yfvvNdMl9aa39hNa4v5y5YtOn1TcuqrZDJ27J0h+rY0gRVWk5X08xHCMpubzDZv3kyTJk3YtWsXMTExXPvfPPj79u1jirUN5UKIUss4dBgPjL8TDB2jJq6k5xgMAbz4YtGqxYiK0laktyTzfETWNoFNmyZNVkI4m81NZuHh4Tz66KOMHTuW8uXLs2/fPmrWrMmvv/5KVFQUp3Ob/auYkiYzIQrOeO48rlWCdGl9WcY39M31OG9vSEoqWgGR0aitAZbbr7uQkDvD+XMa3WXKd+JE0bo/IUoKW76/ba4h2r9/P4888ki29MDAQC5mXYhICFEiFHSF9N3PfJotGPIhOc9gCODzz4tesJDXzM+g7Z8xI/dZnA0GbX9Ruz8hSiObAyI/Pz/OWWgY37Nnj3nEmRCi5CjQCulpaaS6edJq4XBz0lu8hAHFVfKubY2KKppNSdb2DTL1IpDRXUIUfTZ3qu7Xrx8vvvgiy5cvx2AwkJGRwfbt2xk/frx5kkYhRMlgGlGVtbnHtAp9rl/ocXFw//14ZEqqxT8cp5bV1x8xwuYiFwpbhsdHR2ujxxw5P5IQouBs7kOUlpbGyJEjWbx4MUajETc3N4xGIwMGDGDx4sW4lsD/4dKHSJRG1vST8feHxMQsX+xKaTMo/vyzOSmWCLrwE5DDBD4WFMW+QybWPJvMCrokhhAifxzah8jd3Z0FCxZw7NgxVq9ezZdffslff/3FF198USKDISFKK2v6yVy6pC1LYXbsmLZiaqZgqBMb6UIstgRDAOPHF81gCPQrvFvDUTNQCyHsJ1/zEAFUq1aNatWq2bMsQogiJLe1tzKbMQPuuQeqzn2ZRivvrNR6y9UTH+MVbuOer+svWABNmhTdPjZRUdpweWtmG3HkDNRCCPuwqsls7NixVp/wgw8+KFCBiiJpMhOlhWn19/XrYd06644pTwop+OrS9jwzlw9vDeeLL/JfltxWqS8q8mo6s2UVeyGE/dl9puo9e/ZYdWFDTgv8CCGKvIkT4d13bTumL8tYRn9dWiCJXFwYSO/eBSuPUlpAER2tdUguigFF5kVTQRZNFaI4k7XMrCA1RKKkMdUEmSYOjI+Hjz6y/ngXjBylDjWJN6fN5xmeZb4DSlv0OyXHxGRfyiM0VAuGimrtlhClgcPXMhNCFF8TJsAHH0BGRv6Ob8nubMtsNGUf+2lqh9JZVtQ7JUdFybB6IYo7CYiEKEW6dbO+b5AlX/A4j7PE/PpPmtCcvSjbB6zapDh0SpZFU4Uo3iQgEqKU8PSEmzfzd2ww5zhHFV1aL75jJb0KXrBcmDolt2/v0MuUaEaj1FwJYQ3H/lknhCgSDIb8B0PRfJgtGPLiWqEEQyCdkguiQMuuCFHKSEAkRAnn6Zm/4zy4hcLAh9yZduNl3sSA4gZediqdplOn7Gt9VagAU6dqfXOE7UzLrmSdEuDMGS1dgiIh9JwaEE2fPp277rqL8uXLExgYSK9evThy5Iguz61btxg5ciT+/v54e3vTu3dvEhMTdXlOnjxJjx498PT0JDAwkAkTJpCenq7Ls2nTJlq2bImHhwe1a9dm8eLFjr49IZwuISF/NUORrOMW5XRp1fiXt3nZTiXTe+YZ+PdfbaLDihW1tMuXtUkPpUbDdkajNurN0hhiU1p0tJZPCKFxakC0efNmRo4cyS+//EJsbCy3b9+mS5cuXL9+3ZxnzJgx/PDDDyxfvpzNmzdz9uxZojKNYzUajfTo0YO0tDR27NjB559/zuLFi3nttdfMeeLj4+nRowedOnVi7969REdH8/TTT7N+/fpCvV8hCluzZrYeodhBOOvoZk75nocxoDiF42amr1wZVq7UaoQuX9bvkxoN2+W17IpScOqUlk8I8T+qCDl//rwC1ObNm5VSSiUlJakyZcqo5cuXm/McPnxYAWrnzp1KKaV+/PFH5eLiohISEsx55s2bp3x8fFRqaqpSSqmJEyeqRo0a6a7Vt29fFRkZaVW5kpOTFaCSk5MLdH9CFDZXV6W0r7+8t3oczpbYlm1WH5/fLTRUqdRUpUJCcs5jMGj50tO1+0pPVyouTqmlS7WfpnShWbrUume/dKmzSyqEY9ny/V2k+hAlJycDUPF/dea7d+/m9u3bREREmPPUr1+fatWqsXPnTgB27txJkyZNCAoKMueJjIwkJSWFgwcPmvNkPocpj+kcWaWmppKSkqLbhCjqjEbYsAFefVXbNmywvknkA8bwFw3Mr88TgBu32cG9DirtHbNmwY4d1tdoSEfhvFk7TUFxmM5AiMJSZAKijIwMoqOjuffee2ncuDEACQkJuLu74+fnp8sbFBREQkKCOU/mYMi037QvtzwpKSnctNDBYvr06fj6+pq30NBQu9yjEI4SEwOVKkFEBLz5prZl+RvAIj+uoDAwhlnmtCdYRBDnMRbCrBxRUdpm7cSLK1dKR2FrtG+vTVeQ02pKBoM2k7ZMZyDEHUUmIBo5ciQHDhxg2bJlzi4KkydPJjk52bydOnXK2UUSIkcxMdC7NyQl2XbcYD7nChV1aRW5xOc8Ybey5aVhQ+2ntTUVX34pHYWtYVpjDbIHRTKdgRCWFYmAaNSoUaxevZq4uDhCQkLM6cHBwaSlpZGU5Td9YmIiwcHB5jxZR52ZXueVx8fHh3Ll9CNpADw8PPDx8dFtQhQlaWna8hsPPYTNi6i6kk4CQbrAZxajMaCyBUiO5uenBTDW1GgEBMDFizmfSzoK60VFwbffZp/OICRES5c11oTQc2pApJRi1KhRfPfdd2zcuJEaNWro9rdq1YoyZcqwYcMGc9qRI0c4efIk4eHhAISHh7N//37Onz9vzhMbG4uPjw8N//fnZ3h4uO4cpjymcwhRnEycCB4eMG4c/PCDbceGs4N0yhDEnf8vDTikazIrTOPHa/1/Vq7Mu0Zj4EDrzlnU1z0rTFFRcOKEtjju0qXaz/h4CYaEsMjxfbxzNnz4cOXr66s2bdqkzp07Z95u3LhhzvPcc8+patWqqY0bN6rff/9dhYeHq/DwcPP+9PR01bhxY9WlSxe1d+9etW7dOhUQEKAmT55sznP8+HHl6empJkyYoA4fPqw++eQT5erqqtatW2dVOWWUmSgqJkzI/2iuGHrpEnZwj4IMh40e8/S0Lp/BoG0rVmhb1tFmoaFaelycdeeLi3P2uySEKCps+f52akAEWNwWLVpkznPz5k01YsQIVaFCBeXp6akeeeQRde7cOd15Tpw4obp166bKlSunKlWqpMaNG6du376tyxMXF6eaN2+u3N3dVc2aNXXXyIsERKIoSE3N57B2/s2W2JUfHRYImbbRoy0HODkFRaZh9TkNqU9P185lMFg3NF8IIWz5/jYoZamLosgsJSUFX19fkpOTpT+RcJqyZSE11bZjJvN2ttmly3Ej2yzUjhAXp63+bjTCnDkwZoz1x+TEtBwF3OlIDXea1aRvjBAiM1u+v4tEp2ohRO4uXLAtGPLkOgqDLhgay/sYUIUSDAUE3BnS7eoKWWa9yFFe/X+ko7AQwlEcP9GIEMJmRqM2WurMGS0YmjLF+mMfYmW2leircCbbivWONHeufki3PScKjIrSFnzdulULoCpX1oIvGUIuhCgICYiEKGJiYrSFOXObudkSAxnspTlN2W9OW0p/BrLUziXM3YQJd5q1TEzD6s+c0Td1mRgM2n5rJwp0dc29aU0IIWwlAZEQRYhpkkVbNWY/+2mqS7uLX/mdu+xUsrz5+sKCBfDoo9n3mSYK7NNHC34s9f+RiQKFEM4kfYiEKCIuX85fMPQpz+qCoXiq40q6XYMhF5ecJ00Erc/Q+fOWgyET6f8jhCjKpIZIiCKgZk1twjxbVOICFwjUpfVnKcvob8eSaTIytJ851e58+im4u+d9npLU/8fUz6u434cQQiMBkRBO5uUFN27YdswwPuMzntOl+ZJECr52LJledLRWk5O5b1NIiNbUZUvtTkno/2Opn1dIiNYsKDVdQhRP0mQmhBN17WpbMFSGNK7irQuG3mYyBpRDgyHQanZkGYg7cyFl7fR+5oyWHhPjnHIJIQpGJma0gkzMKBxh2DCtE7K1OrCJTXTSpdXmKMeoXeCyVKoEly7lPgIsPl6ahIxGbe21nEYAyrMSomiRiRmFKIJu3tSCoNBQ7YvT+mBIsZ4uumDoZzpjIMMuwVBoqDZvEOS8sKqMANNs3Zr7dAhKwalTWj4hRPEifYiEKAS9emkrutuqBsc5Ti1d2v1sII777VIug+FOHyBXV8v9YmztI1SS5TWTtq35hBBFhwREQjhYfoOh13mVV3nT/PoG5ajAFdLwyFc5KlbUhvabhIbqg52SNALMUew547YQomiRPkRWkD5EIr9u3gRPT9uOKU9Ktg7SI/iEeYwoUFl+/lkLbiTYyT9TH6K8ZtyWPkRCFA22fH9LDZEQDmRrMPQYX/M1/XRpgSRmm28oP1avhg8/zD2PI+bWKUnz9ciM20KUXNKpWgg7Mhph0yb4v//LfWbnrFww8g+1dMHQAp7GgLJLMATaF3VuQ8JjYrTaj06dYMAA7Wf16gUbRu6IczqbzLgtRMkkTWZWkCYzYY2YGHj+eTh71rbjWrKb3bTWpTVjL3/SzI6l04SGWm7OMc2tk/W3gSmoy88XvSPOWZSUpJovIUoqW76/JSCyggREIi9ffw39+uWdL6vPGcxgvjC/PkAjmvInyoGVt3Fx+pmiHTG3jszXI4QoCmQeIiEK0fPP2x4MBXMOhUEXDD1CDE04kO9gyJq1xCD7kHBHzK0j8/UIIYob6VQtRAEEB0Niom3HjGYWsxijS/PmKtfxzlcZmjaFDz7Q/h0RkXf+rEPCHTG3jszXI4QobiQgEiKfbA2GPLjFLcrp0l7ldd7k1XyXwWCA337TaoeMRq0ZKq8h4e3b69OPHrXuWrbMrSPz9QghihtpMhMiH154wbZgqAvrswVDYZwoUDAE8Nhjd5rKTEPCwfolOIxG65YQsRRI5aZ9e+2YnEbaGQxaB29bzimEEI4kAZEQNjAaYf16mDPH2iMU27iX9XQ1p6ziQQwoThJW4PI8+KD+ta1DwvPq62PyzDO2dX7OT3AmhBDOJAGREHkwGmHVKm1FeDc36No172MA6nIEhQv3ssOc1o6tPMwqu5XtwoXsaVFRcOKENpps6VLtZ3y85SHu1vbhqVPH9rLJfD1CiOJE+hAJkYuYGOjd2/bj3mMc4/jA/Poi/lTmHOmUsWPpICDAcrqrq35ofU4c3ddH1kcTQhQXEhAJkYP8BEN+XOEKFXVpT/F/LOIpO5bsjr/+0mbGzm+QYerrY2tHbFtYG5wJIYQzSZOZEBZcvmx7MDSI/2YLhipyyWHBEMCbbxZsOQzp6yOEEBoJiITIonZt8Pe3Pr8r6ZwjmP8yxJw2mxcwoLIFSI5y5oy2TEZ+giLp6yOEELJ0h1Vk6Y7S4eZNreP0jRvWH3MPO9lJW11aQw5ymIZ2Ll3eCrochqzNJYQoaWTpDiFs1KsXeHraFgx9S29dMLSLuzGQ4ZRgCAq+HIapr0///tpPCYaEEKWJdKoWpd6DD8Lq1dbnD+EUp6imS+vOGtbS3c4lyx9ZDkMIIWwnNUSiVDIaYcMGqFLFtmBoEtOzBUPluFFkgiGQ5TCEECI/JCASpU5MjNZpOiLC+toUT66jMDCdl8xp43kXAyrbkhyO5O8vy2EIIYQjSEAkShXT3ELJydYf8yCrsq1EX5XTvM94O5fuDhcL/zP9/eGp/43glyHyQghhXxIQiVIjLc3WuYUUe2jOKh42p3xFPwwozlI1l+Pyr3dvmDbN8iSJly/De+/B+PEyRF4IIexNOlWLUsHWWacbcYADNNGl3c0ufuNuO5fsDn9/be2xWrUsB0RKaTVBy5bBsWOwY4cMkRdCCHuRgEiUeF99BQMGWJ+/B6tZzZ1l5P+lGjU5TgaOjTjmz9eCnNxWnzcNrd+xQ5bDEEIIe5ImM1GitWtnfTBUjhvMZbguGBrAEqrzr8ODoZ49teYuazt5y9B6IYSwL6khEiXSzZvaRIvWasluljCQ+hwB4APG8BJvk0pZB5VQb9cubSoAR68+L4QQwjKpIRIlSloaNGxofTDkgpFJTOcX7qE+RzhDFSKIZRwfFFowBHDhgjbDtGn1eWuG1huN2kr3X32l/TQaC624QghR4khAJEqMF14ADw84fNi6/GGcYBMdmc5LlCGd5fShCfvZQIRjC5qDc+f0q89nlXlo/cqV2gr3nTppTYIFWfFeCCGEBESihPD1hTlzrM2tGMiX7KMZ7dnGVbwZwmIe45tCW53eksBAraZn61YoXz77/ooVtaH1oK1sn7XzdUFWvBdCiNJO+hCJYi8oCFJSrMvrxxXmMZx+fA3AdtoyiC+Ip6YDS5g3d3cYMkQLanJy6RJkZMCYMbkPy4+OhocflmH4QghhC6khEsXWtWtaf5vz563L35E4/qQp/fiadFx5hTfowGanB0Og9X3KLRgCLdgZMcK6Yfn5XfFeCCFKK6khEsVS8+awb591ed1J5U1eYRzv44Lib+rwOF86dJJFR1BK63xtDRmWL4QQtpGASBQ7rq5a05E1GnKQJQykOVr09BnDGMf72dYmK2lkWL4QQthGmsxEsXHtmtZsZE0wZCCD5/mI3bSiOfu4QCUeYiXP8VmxD4YqVZIV74UQwt4kIBLFQqNGlkdeWRLMOX6kOx8xmrKk8iPdaMJ+fuAhxxbSwUzBzty5d15n3Q+y4r0QQuSHUwOiLVu28OCDD1KlShUMBgPff/+9br9Sitdee43KlStTrlw5IiIiOHr0qC7P5cuXGThwID4+Pvj5+TF06FCuXbumy/Pnn3/Svn17ypYtS2hoKDNnznT0rQk7uXlT+6I/dMi6/L34jv00oSvruUlZRvAJPVhDIsGOLWgeQkKyr1Bvi8zBzqOPasPvZcV7IYSwH6cGRNevX6dZs2Z88sknFvfPnDmTjz76iE8//ZRdu3bh5eVFZGQkt27dMucZOHAgBw8eJDY2ltWrV7NlyxaGDRtm3p+SkkKXLl0ICwtj9+7dvPvuu0ydOpX58+c7/P5EwfTqZf2M015cYwFP8x1RVOISf9CClvzBPEYAObQvFaIPP4SPPtICm5yau3KTNdiJioITJyAuDpYu1X7Gx0swJIQQ+aaKCEB999135tcZGRkqODhYvfvuu+a0pKQk5eHhob766iullFKHDh1SgPrtt9/MedauXasMBoM6c+aMUkqpuXPnqgoVKqjU1FRznhdffFHVq1fP6rIlJycrQCUnJ+f39oSNatVSShtXlffWhp3qKNoBRgxqOi+qMqRafXxhbHFx2n2tWKFUSIj1x/XsqR2bnu7Md0MIIYonW76/i2wfovj4eBISEoiIuLOMgq+vL23atGHnzp0A7Ny5Ez8/P1q3bm3OExERgYuLC7t27TLnue+++3B3dzfniYyM5MiRI1y5csXitVNTU0lJSdFtovAYDHDsWN75XEnnNaaxjXbU5hj/Uo1OxDGZGdzGPe8TFCLTMPioKFi82Prjdu7UOkhLnyAhhHCsIhsQJSQkABAUFKRLDwoKMu9LSEggMDBQt9/NzY2KFSvq8lg6R+ZrZDV9+nR8fX3NW2hoaMFvSOQpLc365qSaHGMr7ZnGVNwwsoQBNGMfW+jg2ELmU+Zh8NZOJAna7NSbNtm9OEIIIbIosgGRM02ePJnk5GTzdurUKWcXqcTr3VtbmDVviif5D/toRji/kIQvA1jC4ywhGT8HlzJ/XF2hTZs7K9MnJtp2vAREQgjheEV2YsbgYG1UUGJiIpUz/XmdmJhI8+bNzXnOZ/lzOz09ncuXL5uPDw4OJjHLN5DptSlPVh4eHnhY9+0s7MDaWqGKXGI+w+iNtnrpJjowhM85SZgDS1dwRiOEhelnmXZ11dKFEEIUDUW2hqhGjRoEBwezYcMGc1pKSgq7du0iPDwcgPDwcJKSkti9e7c5z8aNG8nIyKBNmzbmPFu2bOH27dvmPLGxsdSrV48KFSoU0t0ISxISrA+GHuAn9tOE3sSQRhkm8g6d2VDkgyGTrEtu2BIMdexo16IIIYSwwKkB0bVr19i7dy979+4FtI7Ue/fu5eTJkxgMBqKjo3nzzTdZtWoV+/fvZ/DgwVSpUoVevXoB0KBBA7p27cozzzzDr7/+yvbt2xk1ahT9+vWjSpUqAAwYMAB3d3eGDh3KwYMH+frrr5k9ezZjx4510l0LgHLlrFteoiw3mcVofiKSKpzjMPW5h194l4lkUPJ7Gvv7S0AkhBCFohBGveUoLi5OAdm2IUOGKKW0ofevvvqqCgoKUh4eHqpz587qyJEjunNcunRJ9e/fX3l7eysfHx/15JNPqqtXr+ry7Nu3T7Vr1055eHioqlWrqhkzZthUThl2bz/p6dYPOW/KXrWfRuaEOYxU5bju9CH0hbmtWOHsd0wIIYovW76/DUop5cR4rFhISUnB19eX5ORkfHx8nF2cYmv5cnjssbzzGchgDB/yNi/hQRoJBPEU/2Et3R1fSCeIjtaezZkzd9JCQmD2bJloUQghCsKW7+8i26lalBxGIzz8MKxZk3feEE7xOUO4nzgAVvIQT7OQiwQ4uJTO8/DD8N57sHWrNl9R5coy95AQQhQ2CYiEQy1ZAo8/bl3eR/mGz3iWCiRxHU+imcVCnqYoLL3hCAaDVhNkCn6kr5AQQjiPBETCYWrVguPH887nQzJzeJ7BfAHAr9zF43zJUeo6uITOIyvTCyFE0VJkh92L4q1iReuCoXvZxj6aMZgvMOLC67zKvWwv0cEQyMr0QghR1EgNkbCrtDTrZpx24zZTmcokZuBKBsepwSC+YAf3Or6QTvLKK9CwofQREkKIokgCImE3Y8ZoTUB5qcsRvuRx7uJ3ABbxBKOZzVVK9gi+zp2ln5AQQhRVEhCJArt5Ezw9rcmpGMZ8PmAsXtzgMhUYxnxW0MfRRSyQV1+F+++HlSutC/iyytx5WgghRNEkAZEokAcegJ9/zjtfAOf5P4byIKsB+JnODOFzzlLVwSUsGH9/mDLlziiwtm2hb19t2kRrSOdpIYQoHqRTtcg3g8G6YKg7a9hPEx5kNam4M4YP6MJPTg2GXFzghRdg2jTtPnJaU+3TT/WBzKOPagGStaTztBBCFA9SQyRsZjSCmxWfnHLc4D3GM4J5AOynMQNYygGaOLiEefv6a+jzv5a6xo1h9Gg4fTp7vjFjtOApc0DzyiswZw5cupTz+f39tWt07Cg1Q0IIURxIDZGwybJl1gVDLdnNH7Q0B0MfMIa7+K1IBEMVK8Ijj9x5HRUFH3xgOe/p09C7N8TE3ElzdYX58y3XKplqm+bP1zpRSzAkhBDFgwREwipGIwQGQv/+uedzwcgkpvML91CfI5yhChHEMo4PSKVs4RQ2D5cvw1tv3XltNMLw4bkfM2yYli+zihWz5ytfXqttqlgxe34hhBBFlwREIk8LFmi1Qhcu5J4vjBPE0YnpvEQZ0vmW3jTlTzYQUTgFtcGUKXdqfTZtyr35C7T9mzZp/46J0ZrbLB2TkqJ1oO7UCapX19csCSGEKLokIBK5Mhi02pHcKQbyJftoxn1s5SrePMEiHmU5l/EvjGLmS3S0VotjCnTysmmTln/0aOtGmZ05owVOEhQJIUTRJwGRyFFOI68y8+MKSxnAlwzClxS205Zm7ONznqCoL8p66pS2wrwttm613PnaElPQZAq8hBBCFF0SEIlsEhKsC4Y6Esc+mtGfZaTjyqu8Tgc2E09NxxfSTs6ds3726I4dtfy2UCp/gZcQQojCJQGR0HF11dbayo07qbzDRDbQmWqc4ii1acsO3uRVjMVsJofKlbVAxz+Plj1/fy1fXs8mJ7YGUkIIIQqXBETCzGCAjIzc8zTgEL9wDxN5FxcU83mGFuzhN+4unELaicEAoaF3FlmdPz/3/PPna/nat9cmW7SmBi2z/AZSQgghCocERIKbN635gleMYg67aUUL9nKBSjzM9zzLfK7jXRjFtBtLy2lERcGKFVA1y+TZISFaumliRldXmD1bf568rmUKvIQQQhRdEhCVcqGheS/MGsw51tKNObxAOW6xlq40YT+reLhwClkATz6pBTWZ5bScRlQU/PsvxMXB0qXazxMnLOf79tvswVNWso6ZEEIUHwalrF2msvRKSUnB19eX5ORkfHx8nF0cu7GmhuNhvmchT1OJS9ykLBN4l08YSVEfQQZav5/ERO3fmzbdGV7fsWP2JTWMRq3j87lzWvOWqSktN5mPOXpUm68p8wi00FAtGJJ1zIQQwjls+f4uXj1ghV3cvJl3rZAX1/iQMTzDQgD20JyBLOEwDQuhhPbxwgtaUBMTo1+r7M03tVqi2bO1YCXrftDvz4mrq36E2ssv2x5UCSGEKBqkhsgKJamGqE4d+Oef3PPczS6WMJDaHCMDA+8ygVd5g9u4F04h7cBUO7RypTY5YtZPual2bPx4ePfdnM+Tuf+QEEKI4sWW728JiKxQUgKivJrIXEnnJd7mNV7HDSMnCWUw/2UzHQulfPlhMFieNXrFCnj4YW35jJwmUjQ9j9z+B5gCK0s1PflpZhNCCFF4bPn+lk7VpcCZM3kHQzU5xlba8zpTcMPIUvrTlD+LZDDk7a0FPJZGhYWG3qnVyWtWaaXyXoIj8xpmmcXEaMFWp04wYICsXSaEEMWd9CEq4Vxc8vrSVzzBYj7iBcpzjWR8GM48vmJAYRXRZmXLarU/rq7az5xqaew1GeKmTdC5853XpsVdsz5X09pllkawCSGEKNokICrB8qoVqsgl5jOM3mjVGpu5j8H8l5OEFULp8u/iRS0IMo0Uy2npDUdMhpjb4q5Kac88OvpOwCaEEKJ4kCazEig5Oe9gKIJY9tOE3sSQRhleZAb3s7HIB0Mm1tT+5HdWaUvnMbGmGU7WLhNCiOJHAqISpnx58PPLeb8Ht/iAMcTShSqc4zD1uYdfmMmLZODcKo3wcOvzWlP7Y+us0rmdx8TaZjhZu0wIIYoXCYhKEIMBrl3LeX8T/uR3WjOGWQB8wghasZs9tCycAuYhMlLr85SXkBDrl8LIaVbpgADry3X+/J1/W9sMJ2uXCSFE8SIBUQmQkJB7DYiBDMbwAb9xF405SCKBdGcNo/iEm+QxQ2Mhmjo178VlQav1saV/TlSUtgRH5iU5Tp8Ga2dQyBzc5NUMJ2uXCSFE8SQBUTFnMOReG1GV08TyAB8wDg/SWMWDNGE/a+leeIW0k4oV7TdRoqsrLFyYd76swU1uzXCydpkQQhRfEhAVY3n1i+nDcv6kKZ3ZyHU8GcZnPMxKLhBYOAW0s2++yV8wlNOcQa6uMGFCzscZDJaDm5ya4XJaNFYIIUTRJwFRMZRXE1l5UljMEJbzGBW5wq/cRQv2sIBhFLVFWW3p7Jy5L4+1THMGZR0ZZpoz6J57YPny7H2KQkNzD24sNcPFx0swJIQQxZUs3WGForR0R5kykJ6e8/572cYXDKIGJzDiwtu8xOu8RjplCq+QNggNhaefhilT8s4bF5fznEOWGI15L90REqIFMiDLcAghREkjq92XQEYjuOXybpXjBlOZyjjex5UM4qnO43zJDu4tvELaoFcvbYJDU/+cBQu0WhtL4bkpcLG1o7ItcwZ17GhbsCWEEKJkkSazYuCLL3IPhnqwmht4MZF3cSWDxQyhGfuKbDAEMGLEnZmmHdVRWeYMEkIIYS0JiIo4Pz8YPDinvYrfaM1qHjSnDOMznmQxV3Fu015ess435IiOyjJnkBBCCGtJk1kRlluH44Yc5CCNdWlt+IVfaePgUtmHpQ7SUVG5L9ZqK9OcQfZuihNCCFHySEBUBN28CZ65zJc4h1GM4hPz61OEUIN4jMXo7cypVia3xVptZWqK69NHC34yB0UyZ5AQQojMpMmsiHnwwZyDoYpcQmHQBUOP8wXVOFWsgqHCnMlZ5gwSQghhjeLzLVoK1Kx5Zwh4VkNZyEKe0aX5cYVk/BxfMDsr7FoZezfFCSGEKHkkICoCdu+G1q0t73PjNucJpAJJ5rSZTOBFZhZO4ezI3x/mz3dOrYw9m+KEEEKUPBIQOVluHafbsZWt3KdLq8sRjlLXwaWyr7JlYfJkePllqZURQghRNElA5ES5BUOr6UEPfjS/3kQHOhFHUVt6IzcGAzz2GCxZIoGQEEKIok0CIifJKRgK4wQnqKFLe4Cf+JkHCqFUtitXThsVZ+LjA23bQmSkNvmiu7t9rmM0Oq4PkCPPXVzIMygceT1na9+H0v5+lfb7Fw6iSpGPP/5YhYWFKQ8PD3X33XerXbt2WXVccnKyAlRycrJdyqENAM++vcZUXUIqZZQ7t3LMX1S2gACloqOViotTKj3dLo9IZ8UKpUJC9NcMCdHSi/K5iwt5BoUjr+ds7ftQ2t+v0n7/wja2fH+XmoBo2bJlyt3dXf3nP/9RBw8eVM8884zy8/NTiYmJeR5rz4Dor7+yBxTepGRLHMVHTg90rN0MBm1zxC+kFSu0czvimo48d3Ehz6Bw5PWcJ0yw7n0o7e9Xab9/YTtbvr9LzWr3bdq04a677uLjjz8GICMjg9DQUJ5//nkmTZqU67H2XO3e1RUyMu68jmIFK+ijyxNEAucJKtB1ClvmlePt2ZRl7Wr1tl7TkecuLuQZFI68njNoz9dotLzP9D788w/UqlV63y/5vIr8sOX7u1RMzJiWlsbu3buJiIgwp7m4uBAREcHOnTuz5U9NTSUlJUW32UvmYMiN27pgaDFDMKCKXTAE2t9pppXj7cWW1eqL0rmLC3kGhSOv5ww5B0Nw532YO7d0v1/yeRWOVioCoosXL2I0GgkK0gcaQUFBJCQkZMs/ffp0fH19zVtoaKhDypWOG1/RD4AW/MGTLHbIdQqTPVeOd+Rq9Y48d3Ehz6Bw2Ov5HTtWuNcrauTzKhytVAREtpo8eTLJycnm7dSpU3Y7dw3dADIDA/gKA4q9tLDbNZzJnivHO3K1ekeeu7iQZ1A47PX8atUq3OsVNfJ5FY5WKgKiSpUq4erqSmJioi49MTGR4ODgbPk9PDzw8fHRbfby++92O1WRYjDYf40y02r1OU1RUJBrOvLcxYU8g8KR13MGrc9LXu/DiBGl+/2Sz6twtFIRELm7u9OqVSs2bNhgTsvIyGDDhg2Eh4cXalkqVoSg4tdFCIBx47RfOll/ITlq5XjTavWZr2Gvazry3MWFPIPCkddzNhhg7Nic94P2Pri7l+73Sz6vwuEcPuatiFi2bJny8PBQixcvVocOHVLDhg1Tfn5+KiEhIc9j7T0PkVJKBQXlPpQ9NdXyfBuZNze3nIfB23NYfWho7nOlZN7vCI68pjPup6iRZ1A48nrO1r4Ppf39Ku33L2wjw+5z8PHHH/Puu++SkJBA8+bN+eijj2jTpk2ex9lz2H1mly/DXXfB8ePaaxcXOHQI6tW7kyfzjKyBgVra+fN3Zmc1GmHOHNi2Dby9YdAgbRHTHTvuHJOWBkuXwrVrcO+9UL8+fPUV/PuvNox1yBDo0EF/TNbr5Gc2XXuSmaodS55B4ZCZqu2jtN+/sJ4t39+lKiDKL0cFREIIIYRwHJmHSAghhBDCBhIQCSGEEKLUk4BICCGEEKWeBERCCCGEKPUkIBJCCCFEqScBkRBCCCFKPQmIhBBCCFHqSUAkhBBCiFJPAiIhhBBClHpuzi5AcWCazDslJcXJJRFCCCGEtUzf29YsyiEBkRWuXr0KQGhoqJNLIoQQQghbXb16FV9f31zzyFpmVsjIyODs2bOUL18eg8Fgt/OmpKQQGhrKqVOnZI00K8jzso08L9vI87KNPC/byPOyjb2el1KKq1evUqVKFVxccu8lJDVEVnBxcSEkJMRh5/fx8ZH/IDaQ52UbeV62kedlG3letpHnZRt7PK+8aoZMpFO1EEIIIUo9CYiEEEIIUepJQOREHh4eTJkyBQ8PD2cXpViQ52UbeV62kedlG3letpHnZRtnPC/pVC2EEEKIUk9qiIQQQghR6klAJIQQQohSTwIiIYQQQpR6EhAJIYQQotSTgMhJPvnkE6pXr07ZsmVp06YNv/76q7OLVCi2bNnCgw8+SJUqVTAYDHz//fe6/UopXnvtNSpXrky5cuWIiIjg6NGjujyXL19m4MCB+Pj44Ofnx9ChQ7l27Zouz59//kn79u0pW7YsoaGhzJw509G3ZnfTp0/nrrvuonz58gQGBtKrVy+OHDmiy3Pr1i1GjhyJv78/3t7e9O7dm8TERF2ekydP0qNHDzw9PQkMDGTChAmkp6fr8mzatImWLVvi4eFB7dq1Wbx4saNvz+7mzZtH06ZNzRO5hYeHs3btWvN+eVa5mzFjBgaDgejoaHOaPLM7pk6disFg0G3169c375dnld2ZM2d4/PHH8ff3p1y5cjRp0oTff//dvL/I/b5XotAtW7ZMubu7q//85z/q4MGD6plnnlF+fn4qMTHR2UVzuB9//FG9/PLLKiYmRgHqu+++0+2fMWOG8vX1Vd9//73at2+feuihh1SNGjXUzZs3zXm6du2qmjVrpn755Re1detWVbt2bdW/f3/z/uTkZBUUFKQGDhyoDhw4oL766itVrlw59dlnnxXWbdpFZGSkWrRokTpw4IDau3ev6t69u6pWrZq6du2aOc9zzz2nQkND1YYNG9Tvv/+u7rnnHtW2bVvz/vT0dNW4cWMVERGh9uzZo3788UdVqVIlNXnyZHOe48ePK09PTzV27Fh16NAhNWfOHOXq6qrWrVtXqPdbUKtWrVJr1qxRf//9tzpy5Ih66aWXVJkyZdSBAweUUvKscvPrr7+q6tWrq6ZNm6rRo0eb0+WZ3TFlyhTVqFEjde7cOfN24cIF8355VnqXL19WYWFh6oknnlC7du1Sx48fV+vXr1f//POPOU9R+30vAZET3H333WrkyJHm10ajUVWpUkVNnz7diaUqfFkDooyMDBUcHKzeffddc1pSUpLy8PBQX331lVJKqUOHDilA/fbbb+Y8a9euVQaDQZ05c0YppdTcuXNVhQoVVGpqqjnPiy++qOrVq+fgO3Ks8+fPK0Bt3rxZKaU9mzJlyqjly5eb8xw+fFgBaufOnUopLQB1cXFRCQkJ5jzz5s1TPj4+5uczceJE1ahRI921+vbtqyIjIx19Sw5XoUIFtXDhQnlWubh69aqqU6eOio2NVR06dDAHRPLM9KZMmaKaNWtmcZ88q+xefPFF1a5duxz3F8Xf99JkVsjS0tLYvXs3ERER5jQXFxciIiLYuXOnE0vmfPHx8SQkJOieja+vL23atDE/m507d+Ln50fr1q3NeSIiInBxcWHXrl3mPPfddx/u7u7mPJGRkRw5coQrV64U0t3YX3JyMgAVK1YEYPfu3dy+fVv3vOrXr0+1atV0z6tJkyYEBQWZ80RGRpKSksLBgwfNeTKfw5SnOH8ejUYjy5Yt4/r164SHh8uzysXIkSPp0aNHtvuSZ5bd0aNHqVKlCjVr1mTgwIGcPHkSkGdlyapVq2jdujWPPvoogYGBtGjRggULFpj3F8Xf9xIQFbKLFy9iNBp1/ykAgoKCSEhIcFKpigbT/ef2bBISEggMDNTtd3Nzo2LFiro8ls6R+RrFTUZGBtHR0dx77700btwY0O7F3d0dPz8/Xd6szyuvZ5FTnpSUFG7evOmI23GY/fv34+3tjYeHB8899xzfffcdDRs2lGeVg2XLlvHHH38wffr0bPvkmem1adOGxYsXs27dOubNm0d8fDzt27fn6tWr8qwsOH78OPPmzaNOnTqsX7+e4cOH88ILL/D5558DRfP3vax2L0QxMHLkSA4cOMC2bducXZQirV69euzdu5fk5GS+/fZbhgwZwubNm51drCLp1KlTjB49mtjYWMqWLevs4hR53bp1M/+7adOmtGnThrCwML755hvKlSvnxJIVTRkZGbRu3Zq3334bgBYtWnDgwAE+/fRThgwZ4uTSWSY1RIWsUqVKuLq6Zht9kJiYSHBwsJNKVTSY7j+3ZxMcHMz58+d1+9PT07l8+bIuj6VzZL5GcTJq1ChWr15NXFwcISEh5vTg4GDS0tJISkrS5c/6vPJ6Fjnl8fHxKXa/6N3d3alduzatWrVi+vTpNGvWjNmzZ8uzsmD37t2cP3+eli1b4ubmhpubG5s3b+ajjz7Czc2NoKAgeWa58PPzo27duvzzzz/y+bKgcuXKNGzYUJfWoEEDczNjUfx9LwFRIXN3d6dVq1Zs2LDBnJaRkcGGDRsIDw93Ysmcr0aNGgQHB+ueTUpKCrt27TI/m/DwcJKSkti9e7c5z8aNG8nIyKBNmzbmPFu2bOH27dvmPLGxsdSrV48KFSoU0t0UnFKKUaNG8d1337Fx40Zq1Kih29+qVSvKlCmje15Hjhzh5MmTuue1f/9+3S+V2NhYfHx8zL+swsPDdecw5SkJn8eMjAxSU1PlWVnQuXNn9u/fz969e81b69atGThwoPnf8sxydu3aNY4dO0blypXl82XBvffem22akL///puwsDCgiP6+t7kbtiiwZcuWKQ8PD7V48WJ16NAhNWzYMOXn56cbfVBSXb16Ve3Zs0ft2bNHAeqDDz5Qe/bsUf/++69SShuG6efnp1auXKn+/PNP9fDDD1schtmiRQu1a9cutW3bNlWnTh3dMMykpCQVFBSkBg0apA4cOKCWLVumPD09i92w++HDhytfX1+1adMm3VDfGzdumPM899xzqlq1amrjxo3q999/V+Hh4So8PNy83zTUt0uXLmrv3r1q3bp1KiAgwOJQ3wkTJqjDhw+rTz75pFgO9Z00aZLavHmzio+PV3/++aeaNGmSMhgM6qefflJKybOyRuZRZkrJM8ts3LhxatOmTSo+Pl5t375dRUREqEqVKqnz588rpeRZZfXrr78qNzc39dZbb6mjR4+qJUuWKE9PT/Xll1+a8xS13/cSEDnJnDlzVLVq1ZS7u7u6++671S+//OLsIhWKuLg4BWTbhgwZopTShmK++uqrKigoSHl4eKjOnTurI0eO6M5x6dIl1b9/f+Xt7a18fHzUk08+qa5evarLs2/fPtWuXTvl4eGhqlatqmbMmFFYt2g3lp4ToBYtWmTOc/PmTTVixAhVoUIF5enpqR555BF17tw53XlOnDihunXrpsqVK6cqVaqkxo0bp27fvq3LExcXp5o3b67c3d1VzZo1ddcoLp566ikVFham3N3dVUBAgOrcubM5GFJKnpU1sgZE8szu6Nu3r6pcubJyd3dXVatWVX379tXNqSPPKrsffvhBNW7cWHl4eKj69eur+fPn6/YXtd/3BqWUsq1OSQghhBCiZJE+REIIIYQo9SQgEkIIIUSpJwGREEIIIUo9CYiEEEIIUepJQCSEEEKIUk8CIiGEEEKUehIQCSGEEKLUk4BICCGEEKWeBERCCKfo2LEj0dHRDjt/9erVmTVrlsPObw1H36MQwn7cnF0AIYRwhN9++w0vLy+nliEmJoYyZcqYX1evXp3o6GgJkoQogiQgEkKUSAEBAU67dlpaGu7u7lSsWNFpZRBC2EaazIQQRUJqairjx4+natWqeHl50aZNGzZt2mTe/++///Lggw9SoUIFvLy8aNSoET/++GOO58vaZGYwGFi4cCGPPPIInp6e1KlTh1WrVpn3X7lyhYEDBxIQEEC5cuWoU6cOixYtMu8/ffo0/fv3p2LFinh5edG6dWt27doFwNSpU2nevDkLFy6kRo0alC1bFtA3mXXs2JF///2XMWPGYDAYMBgM+bovIYRjSA2REKJIGDVqFIcOHWLZsmVUqVKF7777jq5du7J//37q1KnDyJEjSUtLY8uWLXh5eXHo0CG8vb1tusa0adOYOXMm7777LnPmzGHgwIH8+++/VKxYkVdffZVDhw6xdu1aKlWqxD///MPNmzcBuHbtGh06dKBq1aqsWrWK4OBg/vjjDzIyMszn/ueff1ixYgUxMTG4urpmu3ZMTAzNmjVj2LBhPPPMM+Z0e9yXEKLgJCASQjjdyZMnWbRoESdPnqRKlSoAjB8/nnXr1rFo0SLefvttTp48Se/evWnSpAkANWvWtPk6TzzxBP379wfg7bff5qOPPuLXX3+la9eunDx5khYtWtC6dWtAq2EyWbp0KRcuXOC3334zN4PVrl1bd+60tDT++9//5thUV7FiRVxdXSlfvjzBwcG6ey/ofQkhCk4CIiGE0+3fvx+j0UjdunV16ampqfj7+wPwwgsvMHz4cH766SciIiLo3bs3TZs2tek6mfN7eXnh4+PD+fPnARg+fDi9e/fmjz/+oEuXLvTq1Yu2bdsCsHfvXlq0aJFrn6CwsLB89Vuyx30JIQpO+hAJIZzu2rVruLq6snv3bvbu3WveDh8+zOzZswF4+umnOX78OIMGDWL//v20bt2aOXPm2HSdzCO+QOtXZGr26tatm7mPz9mzZ+ncuTPjx48HoFy5cnmeO78j2uxxX0KIgpOASAjhdC1atMBoNHL+/Hlq166t2zI3L4WGhvLcc88RExPDuHHjWLBggV3LERAQwJAhQ/jyyy+ZNWsW8+fPB7Sapb1793L58uUCnd/d3R2j0Zgt3dH3JYTImwREQginq1u3LgMHDmTw4MHExMQQHx/Pr7/+yvTp01mzZg0A0dHRrF+/nvj4eP744w/i4uJo0KCB3crw2muvsXLlSv755x8OHjzI6tWrzefv378/wcHB9OrVi+3bt3P8+HFWrFjBzp07bbpG9erV2bJlC2fOnOHixYuFcl9CCOtIQCSEKBIWLVrE4MGDGTduHPXq1aNXr1789ttvVKtWDQCj0cjIkSNp0KABXbt2pW7dusydO9du13d3d2fy5Mk0bdqU++67D1dXV5YtW2be99NPPxEYGEj37t1p0qQJM2bMsDiaLDevv/46J06coFatWub+Ro6+LyGEdQxKKeXsQgghhBBCOJPUEAkhhBCi1JOASAghhBClngREQgghhCj1JCASQgghRKknAZEQQgghSj0JiIQQQghR6klAJIQQQohSTwIiIYQQQpR6EhAJIYQQotSTgEgIIYQQpZ4EREIIIYQo9f4fhG2aSffhw8cAAAAASUVORK5CYII=",
1069
+ "text/plain": [
1070
+ "<Figure size 640x480 with 1 Axes>"
1071
+ ]
1072
+ },
1073
+ "metadata": {},
1074
+ "output_type": "display_data"
1075
+ }
1076
+ ],
1077
+ "source": [
1078
+ "#interpretation\n",
1079
+ "fig,ax = plt.subplots()\n",
1080
+ "ax.scatter(x_test,y_test,color='blue')\n",
1081
+ "ax.plot(x_test,predict,color='red')\n",
1082
+ "plt.title(\"Prédiction de la Présence (Attendency) (R2 = 0.98)\")\n",
1083
+ "plt.xlabel(\"les inscrits\")\n",
1084
+ "plt.ylabel(\"les presents\")\n",
1085
+ "plt.show()\n"
1086
+ ]
1087
+ },
1088
+ {
1089
+ "cell_type": "code",
1090
+ "execution_count": 22,
1091
+ "id": "b962144b-ab7e-42e3-ba8e-2b31953d64ca",
1092
+ "metadata": {},
1093
+ "outputs": [
1094
+ {
1095
+ "name": "stdout",
1096
+ "output_type": "stream",
1097
+ "text": [
1098
+ "avec MAE : 28.234713006519993\n",
1099
+ "avec R2 : 0.9824798790918774\n"
1100
+ ]
1101
+ }
1102
+ ],
1103
+ "source": [
1104
+ "#mesure performance\n",
1105
+ "print ('avec MAE :', mean_absolute_error(y_test,predict))\n",
1106
+ "print ('avec R2 :', r2_score(y_test,predict))\n",
1107
+ "#score r2 mmoyen , ameliorons ca avec les dates"
1108
+ ]
1109
+ },
1110
+ {
1111
+ "cell_type": "code",
1112
+ "execution_count": null,
1113
+ "id": "4f3a0ef8-9b7b-4cc1-a332-18bf1b8d136c",
1114
+ "metadata": {},
1115
+ "outputs": [],
1116
+ "source": []
1117
+ }
1118
+ ],
1119
+ "metadata": {
1120
+ "kernelspec": {
1121
+ "display_name": "Python 3.10 (ml-env)",
1122
+ "language": "python",
1123
+ "name": "ml-env"
1124
+ },
1125
+ "language_info": {
1126
+ "codemirror_mode": {
1127
+ "name": "ipython",
1128
+ "version": 3
1129
+ },
1130
+ "file_extension": ".py",
1131
+ "mimetype": "text/x-python",
1132
+ "name": "python",
1133
+ "nbconvert_exporter": "python",
1134
+ "pygments_lexer": "ipython3",
1135
+ "version": "3.11.9"
1136
+ }
1137
+ },
1138
+ "nbformat": 4,
1139
+ "nbformat_minor": 5
1140
+ }