sahlnizar commited on Feb 8

Commit

5da71f2

verified ·

1 Parent(s): 62db04f

Add files using upload-large-folder tool

Browse files

Files changed (49) hide show

.gitattributes +35 -35
course_feedback_nlp/Coursera_courses.csv +624 -0
course_feedback_nlp/Untitled.ipynb +418 -0
course_feedback_nlp/evaluate.py +548 -0
course_feedback_nlp/requirements.txt +7 -0
course_feedback_nlp/test.py +52 -0
course_feedback_nlp/train.py +862 -0
course_feedback_nlp/train_3_classes.py +872 -0
dropout_binaryclass/correlation.py +218 -0
dropout_binaryclass/data.csv +0 -0
dropout_binaryclass/feature_importance.png +0 -0
dropout_binaryclass/feature_selection_recommendations.txt +42 -0
dropout_binaryclass/model_config.json +411 -0
dropout_binaryclass/predict_students_dropout_and_academic_success_model.pkl +0 -0
dropout_binaryclass/redundant_feature_pairs.csv +16 -0
dropout_binaryclass/target_correlations.csv +37 -0
dropout_binaryclass/train.ipynb +0 -0
dropout_binaryclass/train.py +224 -0
grade_multiclass/02_grade_distribution.png +0 -0
grade_multiclass/03_performance_index_distribution.png +0 -0
grade_multiclass/04_features_by_grade.png +0 -0
grade_multiclass/05_extracurricular_analysis.png +0 -0
grade_multiclass/06_correlation_heatmap.png +0 -0
grade_multiclass/09_feature_importance.png +0 -0
grade_multiclass/10_learning_curves.png +0 -0
grade_multiclass/11_model_comparison.png +0 -0
grade_multiclass/Student_Performance.csv +0 -0
grade_multiclass/correlation_heatmap.png +0 -0
grade_multiclass/feature_importance.png +0 -0
grade_multiclass/features_by_grade.png +0 -0
grade_multiclass/learning_curves.png +0 -0
grade_multiclass/model_comparison.png +0 -0
grade_multiclass/student_performance_classification.ipynb +0 -0
grade_multiclass/student_performance_classification.py +1100 -0
grade_multiclass/target_distribution.png +0 -0
lr_attendance/2018-2019_Daily_Attendance_20240429.csv +0 -0
lr_attendance/add_weather_features.py +195 -0
lr_attendance/best_model_coefficients.csv +13 -0
lr_attendance/explore_data.py +28 -0
lr_attendance/feature_engineering.py +154 -0
lr_attendance/feature_info.json +118 -0
lr_attendance/final_coefficients.csv +13 -0
lr_attendance/final_predictions.csv +0 -0
lr_attendance/improved_predictions.csv +0 -0
lr_attendance/model_comparison.csv +5 -0
lr_attendance/model_summary.csv +2 -0
lr_attendance/nyc_weather_2018_2019.csv +297 -0
lr_attendance/prepare_for_modeling.py +215 -0
lr_attendance/train.ipynb +1140 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

course_feedback_nlp/Coursera_courses.csv ADDED Viewed

	@@ -0,0 +1,624 @@

+name,institution,course_url,course_id
+Machine Learning,Stanford University,https://www.coursera.org/learn/machine-learning,machine-learning
+Indigenous Canada,University of Alberta,https://www.coursera.org/learn/indigenous-canada,indigenous-canada
+The Science of Well-Being,Yale University,https://www.coursera.org/learn/the-science-of-well-being,the-science-of-well-being
+Technical Support Fundamentals,Google,https://www.coursera.org/learn/technical-support-fundamentals,technical-support-fundamentals
+Become a CBRS Certified Professional Installer by Google,Google - Spectrum Sharing,https://www.coursera.org/learn/google-cbrs-cpi-training,google-cbrs-cpi-training
+Financial Markets,Yale University,https://www.coursera.org/learn/financial-markets-global,financial-markets-global
+Introduction to Psychology,Yale University,https://www.coursera.org/learn/introduction-psychology,introduction-psychology
+Programming for Everybody (Getting Started with Python),University of Michigan,https://www.coursera.org/learn/python,python
+The Bits and Bytes of Computer Networking,Google,https://www.coursera.org/learn/computer-networking,computer-networking
+AI For Everyone,DeepLearning.AI,https://www.coursera.org/learn/ai-for-everyone,ai-for-everyone
+Crash Course on Python,Google,https://www.coursera.org/learn/python-crash-course,python-crash-course
+Psychological First Aid,Johns Hopkins University,https://www.coursera.org/learn/psychological-first-aid,psychological-first-aid
+Neural Networks and Deep Learning,DeepLearning.AI,https://www.coursera.org/learn/neural-networks-deep-learning,neural-networks-deep-learning
+What is Data Science?,IBM,https://www.coursera.org/learn/what-is-datascience,what-is-datascience
+Successful Negotiation: Essential Strategies and Skills,University of Michigan,https://www.coursera.org/learn/negotiation-skills,negotiation-skills
+Fundamentals of Project Planning and Management,University of Virginia,https://www.coursera.org/learn/uva-darden-project-management,uva-darden-project-management
+Project Launch,"University of California, Irvine",https://www.coursera.org/learn/project-management,project-management
+"Brand Management: Aligning Business, Brand and Behaviour",London Business School,https://www.coursera.org/learn/brand,brand
+Writing in the Sciences,Stanford University,https://www.coursera.org/learn/sciwrite,sciwrite
+Stanford Introduction to Food and Health,Stanford University,https://www.coursera.org/learn/food-and-health,food-and-health
+"HTML, CSS, and Javascript for Web Developers",Johns Hopkins University,https://www.coursera.org/learn/html-css-javascript-for-web-developers,html-css-javascript-for-web-developers
+Excel Skills for Business: Essentials,Macquarie University,https://www.coursera.org/learn/excel-essentials,excel-essentials
+Introduction to Negotiation: A Strategic Playbook for Becoming a Principled and Persuasive Negotiator,Yale University,https://www.coursera.org/learn/negotiation,negotiation
+"Everyday Excel, Part 1",University of Colorado Boulder,https://www.coursera.org/learn/everyday-excel-part-1,everyday-excel-part-1
+Learning How to Learn: Powerful mental tools to help you master tough subjects,University of California San Diego,https://www.coursera.org/learn/learning-how-to-learn,learning-how-to-learn
+Google Cloud Platform Fundamentals: Core Infrastructure,Google Cloud,https://www.coursera.org/learn/gcp-fundamentals,gcp-fundamentals
+Viral Marketing and How to Craft Contagious Content,University of Pennsylvania,https://www.coursera.org/learn/wharton-contagious-viral-marketing,wharton-contagious-viral-marketing
+Python Data Structures,University of Michigan,https://www.coursera.org/learn/python-data,python-data
+Private Equity and Venture Capital,Università Bocconi,https://www.coursera.org/learn/private-equity,private-equity
+First Step Korean,Yonsei University,https://www.coursera.org/learn/learn-korean,learn-korean
+"Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning",DeepLearning.AI,https://www.coursera.org/learn/introduction-tensorflow,introduction-tensorflow
+Operating Systems and You: Becoming a Power User,Google,https://www.coursera.org/learn/os-power-user,os-power-user
+Tools for Data Science,IBM,https://www.coursera.org/learn/open-source-tools-for-data-science,open-source-tools-for-data-science
+"Improving Deep Neural Networks: Hyperparameter tuning, Regularization and Optimization",DeepLearning.AI,https://www.coursera.org/learn/deep-neural-network,deep-neural-network
+Diversity and inclusion in the workplace,ESSEC Business School,https://www.coursera.org/learn/diversity-inclusion-workplace,diversity-inclusion-workplace
+Design and Interpretation of Clinical Trials,Johns Hopkins University,https://www.coursera.org/learn/clinical-trials,clinical-trials
+Visual Elements of User Interface Design,California Institute of the Arts,https://www.coursera.org/learn/visual-elements-user-interface-design,visual-elements-user-interface-design
+Management of Fashion and Luxury Companies,Università Bocconi,https://www.coursera.org/learn/mafash,mafash
+Primeros Auxilios Psicológicos (PAP),Universitat Autònoma de Barcelona,https://www.coursera.org/learn/pap,pap
+Social Psychology,Wesleyan University,https://www.coursera.org/learn/social-psychology,social-psychology
+Initiating and Planning Projects,"University of California, Irvine",https://www.coursera.org/learn/project-planning,project-planning
+Computational Thinking for Problem Solving,University of Pennsylvania,https://www.coursera.org/learn/computational-thinking-problem-solving,computational-thinking-problem-solving
+Agile with Atlassian Jira,Atlassian,https://www.coursera.org/learn/agile-atlassian-jira,agile-atlassian-jira
+Fundamentals of Graphic Design,California Institute of the Arts,https://www.coursera.org/learn/fundamentals-of-graphic-design,fundamentals-of-graphic-design
+Introduction to User Experience Design,Georgia Institute of Technology,https://www.coursera.org/learn/user-experience-design,user-experience-design
+Introduction to Marketing,University of Pennsylvania,https://www.coursera.org/learn/wharton-marketing,wharton-marketing
+Python for Data Science and AI,IBM,https://www.coursera.org/learn/python-for-applied-data-science-ai,python-for-applied-data-science-ai
+Marketing Analytics,University of Virginia,https://www.coursera.org/learn/uva-darden-market-analytics,uva-darden-market-analytics
+Natural Language Processing with Classification and Vector Spaces,DeepLearning.AI,https://www.coursera.org/learn/classification-vector-spaces-in-nlp,classification-vector-spaces-in-nlp
+Fundamentals of Quantitative Modeling,University of Pennsylvania,https://www.coursera.org/learn/wharton-quantitative-modeling,wharton-quantitative-modeling
+How to Manage a Remote Team,GitLab,https://www.coursera.org/learn/remote-team-management,remote-team-management
+Mathematics for Machine Learning: Linear Algebra,Imperial College London,https://www.coursera.org/learn/linear-algebra-machine-learning,linear-algebra-machine-learning
+Introduction to Data Science in Python,University of Michigan,https://www.coursera.org/learn/python-data-analysis,python-data-analysis
+Customer Analytics,University of Pennsylvania,https://www.coursera.org/learn/wharton-customer-analytics,wharton-customer-analytics
+Introduction to Psychology,University of Toronto,https://www.coursera.org/learn/introduction-psych,introduction-psych
+English for Career Development,University of Pennsylvania,https://www.coursera.org/learn/careerdevelopment,careerdevelopment
+Global Diplomacy – Diplomacy in the Modern World,University of London,https://www.coursera.org/learn/global-diplomacy,global-diplomacy
+Game Theory,Stanford University,https://www.coursera.org/learn/game-theory-1,game-theory-1
+SQL for Data Science,"University of California, Davis",https://www.coursera.org/learn/sql-for-data-science,sql-for-data-science
+Write Professional Emails in English,Georgia Institute of Technology,https://www.coursera.org/learn/professional-emails-english,professional-emails-english
+Medical Neuroscience,Duke University,https://www.coursera.org/learn/medical-neuroscience,medical-neuroscience
+System Administration and IT Infrastructure Services,Google,https://www.coursera.org/learn/system-administration-it-infrastructure-services,system-administration-it-infrastructure-services
+International Women's Health and Human Rights,Stanford University,https://www.coursera.org/learn/womens-health-human-rights,womens-health-human-rights
+Child Nutrition and Cooking,Stanford University,https://www.coursera.org/learn/childnutrition,childnutrition
+Understanding the Brain: The Neurobiology of Everyday Life,The University of Chicago,https://www.coursera.org/learn/neurobiology,neurobiology
+Introduction to Social Media Marketing,Facebook,https://www.coursera.org/learn/social-media-marketing-introduction,social-media-marketing-introduction
+Forensic Accounting and Fraud Examination,West Virginia University,https://www.coursera.org/learn/forensic-accounting,forensic-accounting
+Clinical Terminology for International and U.S. Students,University of Pittsburgh,https://www.coursera.org/learn/clinical-terminology,clinical-terminology
+Science of Exercise,University of Colorado Boulder,https://www.coursera.org/learn/science-exercise,science-exercise
+Digital Product Management: Modern Fundamentals,University of Virginia,https://www.coursera.org/learn/uva-darden-digital-product-management,uva-darden-digital-product-management
+Data Science Math Skills,Duke University,https://www.coursera.org/learn/datasciencemathskills,datasciencemathskills
+Structuring Machine Learning Projects,DeepLearning.AI,https://www.coursera.org/learn/machine-learning-projects,machine-learning-projects
+An Introduction to American Law,University of Pennsylvania,https://www.coursera.org/learn/american-law,american-law
+The Strategy of Content Marketing,"University of California, Davis",https://www.coursera.org/learn/content-marketing,content-marketing
+Introduction to Cybersecurity Tools & Cyber Attacks,IBM,https://www.coursera.org/learn/introduction-cybersecurity-cyber-attacks,introduction-cybersecurity-cyber-attacks
+The Data Scientist’s Toolbox,Johns Hopkins University,https://www.coursera.org/learn/data-scientists-tools,data-scientists-tools
+Animal Behaviour and Welfare,The University of Edinburgh,https://www.coursera.org/learn/animal-welfare,animal-welfare
+Convolutional Neural Networks in TensorFlow,DeepLearning.AI,https://www.coursera.org/learn/convolutional-neural-networks-tensorflow,convolutional-neural-networks-tensorflow
+Positive Psychology: Martin E. P. Seligman’s Visionary Science,University of Pennsylvania,https://www.coursera.org/learn/positive-psychology-visionary-science,positive-psychology-visionary-science
+Introduction to the Biology of Cancer,Johns Hopkins University,https://www.coursera.org/learn/cancer,cancer
+Convolutional Neural Networks,DeepLearning.AI,https://www.coursera.org/learn/convolutional-neural-networks,convolutional-neural-networks
+Using Python to Access Web Data,University of Michigan,https://www.coursera.org/learn/python-network-data,python-network-data
+Introductory Human Physiology,Duke University,https://www.coursera.org/learn/physiology,physiology
+Introduction to Systematic Review and Meta-Analysis,Johns Hopkins University,https://www.coursera.org/learn/systematic-review,systematic-review
+Organizational Analysis,Stanford University,https://www.coursera.org/learn/organizational-analysis,organizational-analysis
+Communication Strategies for a Virtual Age,University of Toronto,https://www.coursera.org/learn/communication-strategies-virtual-age,communication-strategies-virtual-age
+Moral Foundations of Politics,Yale University,https://www.coursera.org/learn/moral-politics,moral-politics
+Étudier en France: French Intermediate course B1-B2,École Polytechnique,https://www.coursera.org/learn/etudier-en-france,etudier-en-france
+Managing the Company of the Future,London Business School,https://www.coursera.org/learn/company-future-management,company-future-management
+Finance for Non-Finance Professionals,Rice University,https://www.coursera.org/learn/finance-for-non-finance,finance-for-non-finance
+Site Reliability Engineering: Measuring and Managing Reliability,Google Cloud,https://www.coursera.org/learn/site-reliability-engineering-slos,site-reliability-engineering-slos
+Autism Spectrum Disorder,"University of California, Davis",https://www.coursera.org/learn/autism-spectrum-disorder,autism-spectrum-disorder
+Data Science Methodology,IBM,https://www.coursera.org/learn/data-science-methodology,data-science-methodology
+Introduction to Financial Accounting,University of Pennsylvania,https://www.coursera.org/learn/wharton-accounting,wharton-accounting
+Marketing in a Digital World,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/marketing-digital,marketing-digital
+Wind Energy,Technical University of Denmark (DTU),https://www.coursera.org/learn/wind-energy,wind-energy
+Principles of Sustainable Finance,Erasmus University Rotterdam,https://www.coursera.org/learn/sustainable-finance,sustainable-finance
+Financial Engineering and Risk Management Part I,Columbia University,https://www.coursera.org/learn/financial-engineering-1,financial-engineering-1
+Introduction to Philosophy,The University of Edinburgh,https://www.coursera.org/learn/philosophy,philosophy
+Business Metrics for Data-Driven Companies,Duke University,https://www.coursera.org/learn/analytics-business-metrics,analytics-business-metrics
+Python Basics,University of Michigan,https://www.coursera.org/learn/python-basics,python-basics
+Introduction to Sustainability,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/sustainability,sustainability
+Positive Psychiatry and Mental Health,The University of Sydney,https://www.coursera.org/learn/positive-psychiatry,positive-psychiatry
+Cryptography I,Stanford University,https://www.coursera.org/learn/crypto,crypto
+Learning to Teach Online,UNSW Sydney (The University of New South Wales),https://www.coursera.org/learn/teach-online,teach-online
+IT Security: Defense against the digital dark arts,Google,https://www.coursera.org/learn/it-security,it-security
+Entreprise et changement climatique,ESSEC Business School,https://www.coursera.org/learn/entreprise-changement-climatique,entreprise-changement-climatique
+An Introduction to Consumer Neuroscience & Neuromarketing,Copenhagen Business School,https://www.coursera.org/learn/neuromarketing,neuromarketing
+Gamification,University of Pennsylvania,https://www.coursera.org/learn/gamification,gamification
+"Divide and Conquer, Sorting and Searching, and Randomized Algorithms",Stanford University,https://www.coursera.org/learn/algorithms-divide-conquer,algorithms-divide-conquer
+Contabilidad para no contadores,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/contabilidad,contabilidad
+Using Python to Interact with the Operating System,Google,https://www.coursera.org/learn/python-operating-system,python-operating-system
+Object-Oriented Data Structures in C++,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/cs-fundamentals-1,cs-fundamentals-1
+Google Cloud Platform Big Data and Machine Learning Fundamentals,Google Cloud,https://www.coursera.org/learn/gcp-big-data-ml-fundamentals,gcp-big-data-ml-fundamentals
+Databases and SQL for Data Science,IBM,https://www.coursera.org/learn/sql-data-science,sql-data-science
+Natural Language Processing in TensorFlow,DeepLearning.AI,https://www.coursera.org/learn/natural-language-processing-tensorflow,natural-language-processing-tensorflow
+"Advanced Valuation and Strategy - M&A, Private Equity, and Venture Capital",Erasmus University Rotterdam,https://www.coursera.org/learn/advanced-valuation-and-strategy,advanced-valuation-and-strategy
+Natural Language Processing with Probabilistic Models,DeepLearning.AI,https://www.coursera.org/learn/probabilistic-models-in-nlp,probabilistic-models-in-nlp
+Vital Signs: Understanding What the Body Is Telling Us,University of Pennsylvania,https://www.coursera.org/learn/vital-signs,vital-signs
+Understanding Research Methods,University of London,https://www.coursera.org/learn/research-methods,research-methods
+IBM Customer Engagement Specialist Professional Certificate,IBM,https://www.coursera.org/learn/ibm-customer-engagement-specialist,ibm-customer-engagement-specialist
+Introduction to Calculus,The University of Sydney,https://www.coursera.org/learn/introduction-to-calculus,introduction-to-calculus
+Camino a la Excelencia en Gestión de Proyectos,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/camino-excelencia-gestion-proyectos,camino-excelencia-gestion-proyectos
+Introduction to HTML5,University of Michigan,https://www.coursera.org/learn/html,html
+Wine Tasting: Sensory Techniques for Wine Analysis,"University of California, Davis",https://www.coursera.org/learn/wine,wine
+Excel Skills for Business: Intermediate I,Macquarie University,https://www.coursera.org/learn/excel-intermediate-1,excel-intermediate-1
+"Programming Foundations with JavaScript, HTML and CSS",Duke University,https://www.coursera.org/learn/duke-programming-web,duke-programming-web
+Build a Modern Computer from First Principles: From Nand to Tetris (Project-Centered Course),Hebrew University of Jerusalem,https://www.coursera.org/learn/build-a-computer,build-a-computer
+Food & Beverage Management,Università Bocconi,https://www.coursera.org/learn/food-beverage-management,food-beverage-management
+Data Analysis with Python,IBM,https://www.coursera.org/learn/data-analysis-with-python,data-analysis-with-python
+Project Planning,"University of California, Irvine",https://www.coursera.org/learn/project-planning-1,project-planning-1
+Agile Meets Design Thinking,University of Virginia,https://www.coursera.org/learn/uva-darden-getting-started-agile,uva-darden-getting-started-agile
+AWS Fundamentals: Going Cloud-Native,Amazon Web Services,https://www.coursera.org/learn/aws-fundamentals-going-cloud-native,aws-fundamentals-going-cloud-native
+Construction Project Management,Columbia University,https://www.coursera.org/learn/construction-project-management,construction-project-management
+Introduction to Mathematical Thinking,Stanford University,https://www.coursera.org/learn/mathematical-thinking,mathematical-thinking
+Everyday Parenting: The ABCs of Child Rearing,Yale University,https://www.coursera.org/learn/everyday-parenting,everyday-parenting
+Introduction to Healthcare,Stanford University,https://www.coursera.org/learn/intro-to-healthcare,intro-to-healthcare
+Machine Learning with Python,IBM,https://www.coursera.org/learn/machine-learning-with-python,machine-learning-with-python
+Terrorism and Counterterrorism: Comparing Theory and Practice,Universiteit Leiden,https://www.coursera.org/learn/terrorism,terrorism
+Data Management for Clinical Research,Vanderbilt University,https://www.coursera.org/learn/clinical-data-management,clinical-data-management
+Sustainable Fashion,Copenhagen Business School,https://www.coursera.org/learn/sustainable-fashion,sustainable-fashion
+Foundations of Data Science: K-Means Clustering in Python,University of London,https://www.coursera.org/learn/data-science-k-means-clustering-python,data-science-k-means-clustering-python
+Instructional Design Foundations and Applications,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/instructional-design-foundations-applications,instructional-design-foundations-applications
+Cursos en línea: modelo para armar,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/enlinea,enlinea
+Modern Art & Ideas,The Museum of Modern Art,https://www.coursera.org/learn/modern-art-ideas,modern-art-ideas
+"Speak English Professionally: In Person, Online & On the Phone",Georgia Institute of Technology,https://www.coursera.org/learn/speak-english-professionally,speak-english-professionally
+Essential Google Cloud Infrastructure: Foundation,Google Cloud,https://www.coursera.org/learn/gcp-infrastructure-foundation,gcp-infrastructure-foundation
+Introduction to Artificial Intelligence (AI),IBM,https://www.coursera.org/learn/introduction-to-ai,introduction-to-ai
+Dog Emotion and Cognition,Duke University,https://www.coursera.org/learn/dog-emotion-and-cognition,dog-emotion-and-cognition
+International Leadership and Organizational Behavior,Università Bocconi,https://www.coursera.org/learn/organizational-behavior,organizational-behavior
+Driving business towards the Sustainable Development Goals,Erasmus University Rotterdam,https://www.coursera.org/learn/sdgbusiness,sdgbusiness
+"The Sustainable Development Goals – A global, transdisciplinary vision for the future",University of Copenhagen,https://www.coursera.org/learn/global-sustainable-development,global-sustainable-development
+Digital Transformation,BCG,https://www.coursera.org/learn/bcg-uva-darden-digital-transformation,bcg-uva-darden-digital-transformation
+Sequence Models,DeepLearning.AI,https://www.coursera.org/learn/nlp-sequence-models,nlp-sequence-models
+Devenir entrepreneur du changement,HEC Paris,https://www.coursera.org/learn/entrepreneur-changement,entrepreneur-changement
+Seeing Through Photographs,The Museum of Modern Art,https://www.coursera.org/learn/photography,photography
+Entrepreneurship 1: Developing the Opportunity,University of Pennsylvania,https://www.coursera.org/learn/wharton-entrepreneurship-opportunity,wharton-entrepreneurship-opportunity
+Introduction to Search Engine Optimization,"University of California, Davis",https://www.coursera.org/learn/search-engine-optimization,search-engine-optimization
+Learn to Speak Korean 1,Yonsei University,https://www.coursera.org/learn/learn-speak-korean1,learn-speak-korean1
+Circular Economy - Sustainable Materials Management,Delft University of Technology,https://www.coursera.org/learn/circular-economy,circular-economy
+Drug Development,University of California San Diego,https://www.coursera.org/learn/drug-development,drug-development
+R Programming,Johns Hopkins University,https://www.coursera.org/learn/r-programming,r-programming
+Economics of Money and Banking,Columbia University,https://www.coursera.org/learn/money-banking,money-banking
+Chinese for Beginners,Peking University,https://www.coursera.org/learn/learn-chinese,learn-chinese
+Grammar and Punctuation,"University of California, Irvine",https://www.coursera.org/learn/grammar-punctuation,grammar-punctuation
+Japanese for beginners 1,Saint Petersburg State University,https://www.coursera.org/learn/japanese-1,japanese-1
+Introduction to English Common Law,University of London,https://www.coursera.org/learn/intro-common-law,intro-common-law
+Introduction to Dental Medicine,University of Pennsylvania,https://www.coursera.org/learn/dental-medicine-penn,dental-medicine-penn
+Fundamentals of Reinforcement Learning,Alberta Machine Intelligence Institute,https://www.coursera.org/learn/fundamentals-of-reinforcement-learning,fundamentals-of-reinforcement-learning
+The Power of Macroeconomics: Economic Principles in the Real World,"University of California, Irvine",https://www.coursera.org/learn/principles-of-macroeconomics,principles-of-macroeconomics
+Corporate Sustainability. Understanding and Seizing the Strategic Opportunity,Università Bocconi,https://www.coursera.org/learn/corp-sustainability,corp-sustainability
+Behavioral Finance,Duke University,https://www.coursera.org/learn/duke-behavioral-finance,duke-behavioral-finance
+"Sequences, Time Series and Prediction",DeepLearning.AI,https://www.coursera.org/learn/tensorflow-sequences-time-series-and-prediction,tensorflow-sequences-time-series-and-prediction
+Supply Chain Logistics,Rutgers the State University of New Jersey,https://www.coursera.org/learn/supply-chain-logistics,supply-chain-logistics
+Project Execution,"University of California, Irvine",https://www.coursera.org/learn/project-execution,project-execution
+Nutrición y obesidad: control de sobrepeso,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/nutricion-obesidad-sobrepeso,nutricion-obesidad-sobrepeso
+Microeconomics Principles,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/microeconomics,microeconomics
+Creative Writing: The Craft of Plot,Wesleyan University,https://www.coursera.org/learn/craft-of-plot,craft-of-plot
+Astronomy: Exploring Time and Space,University of Arizona,https://www.coursera.org/learn/astro,astro
+Oil & Gas Industry Operations and Markets,Duke University,https://www.coursera.org/learn/oilandgas,oilandgas
+Design Thinking for Innovation,University of Virginia,https://www.coursera.org/learn/uva-darden-design-thinking-innovation,uva-darden-design-thinking-innovation
+EDIVET: Do you have what it takes to be a veterinarian?,The University of Edinburgh,https://www.coursera.org/learn/becoming-a-veterinarian,becoming-a-veterinarian
+Learn to Program: The Fundamentals,University of Toronto,https://www.coursera.org/learn/learn-to-program,learn-to-program
+Financial Accounting Fundamentals,University of Virginia,https://www.coursera.org/learn/uva-darden-financial-accounting,uva-darden-financial-accounting
+Finding Purpose and Meaning In Life: Living for What Matters Most,University of Michigan,https://www.coursera.org/learn/finding-purpose-and-meaning-in-life,finding-purpose-and-meaning-in-life
+Understanding Clinical Research: Behind the Statistics,University of Cape Town,https://www.coursera.org/learn/clinical-research,clinical-research
+Epidemiology: The Basic Science of Public Health,The University of North Carolina at Chapel Hill,https://www.coursera.org/learn/epidemiology,epidemiology
+Fashion as Design,The Museum of Modern Art,https://www.coursera.org/learn/fashion-design,fashion-design
+Teamwork Skills: Communicating Effectively in Groups,University of Colorado Boulder,https://www.coursera.org/learn/teamwork-skills-effective-communication,teamwork-skills-effective-communication
+Feminism and Social Justice,"University of California, Santa Cruz",https://www.coursera.org/learn/feminism-social-justice,feminism-social-justice
+International Organizations Management,University of Geneva,https://www.coursera.org/learn/international-organizations-management,international-organizations-management
+Marketing Digital,Universidade de São Paulo,https://www.coursera.org/learn/estrategia-marketing-digital,estrategia-marketing-digital
+Fundamentals of GIS,"University of California, Davis",https://www.coursera.org/learn/gis,gis
+e-Learning Ecologies: Innovative Approaches to Teaching and Learning for the Digital Age,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/elearning,elearning
+"Excel/VBA for Creative Problem Solving, Part 1",University of Colorado Boulder,https://www.coursera.org/learn/excel-vba-for-creative-problem-solving-part-1,excel-vba-for-creative-problem-solving-part-1
+Rethinking International Tax Law,Universiteit Leiden,https://www.coursera.org/learn/international-taxation,international-taxation
+Introduction to Probability and Data with R,Duke University,https://www.coursera.org/learn/probability-intro,probability-intro
+Understanding and Visualizing Data with Python,University of Michigan,https://www.coursera.org/learn/understanding-visualization-data,understanding-visualization-data
+Fundamentals of Visualization with Tableau,"University of California, Davis",https://www.coursera.org/learn/data-visualization-tableau,data-visualization-tableau
+Getting Started with SAS Programming,SAS,https://www.coursera.org/learn/sas-programming-basics,sas-programming-basics
+Machine Learning for All,University of London,https://www.coursera.org/learn/uol-machine-learning-for-all,uol-machine-learning-for-all
+Using Databases with Python,University of Michigan,https://www.coursera.org/learn/python-databases,python-databases
+Addiction Treatment: Clinical Skills for Healthcare Providers,Yale University,https://www.coursera.org/learn/addiction-treatment,addiction-treatment
+Dino 101: Dinosaur Paleobiology,University of Alberta,https://www.coursera.org/learn/dino101,dino101
+Sports Marketing,Northwestern University,https://www.coursera.org/learn/sports-marketing,sports-marketing
+Positive Psychology,The University of North Carolina at Chapel Hill,https://www.coursera.org/learn/positive-psychology,positive-psychology
+Introduction to Programming with MATLAB,Vanderbilt University,https://www.coursera.org/learn/matlab,matlab
+Preparing to Manage Human Resources,University of Minnesota,https://www.coursera.org/learn/managing-human-resources,managing-human-resources
+Solar Energy Basics,The State University of New York,https://www.coursera.org/learn/solar-energy-basics,solar-energy-basics
+Front-End Web UI Frameworks and Tools: Bootstrap 4,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/bootstrap-4,bootstrap-4
+Building Scalable Java Microservices with Spring Boot and Spring Cloud,Google Cloud,https://www.coursera.org/learn/google-cloud-java-spring,google-cloud-java-spring
+Introduction to Forensic Science,"Nanyang Technological University, Singapore",https://www.coursera.org/learn/forensic-science,forensic-science
+Google Cloud Product Fundamentals,Google Cloud,https://www.coursera.org/learn/google-cloud-product-fundamentals,google-cloud-product-fundamentals
+American Contract Law I,Yale University,https://www.coursera.org/learn/contracts-1,contracts-1
+Engineering Health: Introduction to Yoga and Physiology,New York University,https://www.coursera.org/learn/engineering-health-yoga-physiology,engineering-health-yoga-physiology
+AI for Medical Diagnosis,DeepLearning.AI,https://www.coursera.org/learn/ai-for-medical-diagnosis,ai-for-medical-diagnosis
+Natural Language Processing with Sequence Models,DeepLearning.AI,https://www.coursera.org/learn/sequence-models-in-nlp,sequence-models-in-nlp
+Introduction to Electronics,Georgia Institute of Technology,https://www.coursera.org/learn/electronics,electronics
+International Humanitarian Law in Theory and Practice,Universiteit Leiden,https://www.coursera.org/learn/international-humanitarian-law,international-humanitarian-law
+Making Architecture,IE School of Architecture & Design,https://www.coursera.org/learn/making-architecture,making-architecture
+Model Thinking,University of Michigan,https://www.coursera.org/learn/model-thinking,model-thinking
+Supporting children with difficulties in reading and writing,University of London,https://www.coursera.org/learn/dyslexia-difficulties,dyslexia-difficulties
+Innovation Management,Erasmus University Rotterdam,https://www.coursera.org/learn/innovation-management,innovation-management
+The Manager's Toolkit: A Practical Guide to Managing People at Work,"Birkbeck, University of London",https://www.coursera.org/learn/people-management,people-management
+"The Modern World, Part One: Global History from 1760 to 1910",University of Virginia,https://www.coursera.org/learn/modern-world,modern-world
+Fundamentals of Music Theory,The University of Edinburgh,https://www.coursera.org/learn/edinburgh-music-theory,edinburgh-music-theory
+Supply Chain Principles,Georgia Institute of Technology,https://www.coursera.org/learn/supply-chain-principles,supply-chain-principles
+Essential Google Cloud Infrastructure: Core Services,Google Cloud,https://www.coursera.org/learn/gcp-infrastructure-core-services,gcp-infrastructure-core-services
+Weight Management: Beyond Balancing Calories,Emory University,https://www.coursera.org/learn/weight-management-beyond-balancing-calories,weight-management-beyond-balancing-calories
+Miracles of Human Language: An Introduction to Linguistics,Universiteit Leiden,https://www.coursera.org/learn/human-language,human-language
+Java Programming: Solving Problems with Software,Duke University,https://www.coursera.org/learn/java-programming,java-programming
+Race and Cultural Diversity in American Life and History,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/race-cultural-diversity-american-life,race-cultural-diversity-american-life
+Inspiring and Motivating Individuals,University of Michigan,https://www.coursera.org/learn/motivate-people-teams,motivate-people-teams
+"Competencias digitales. Herramientas de ofimática (Microsoft Word, Excel, Power Point)",Universitat Autònoma de Barcelona,https://www.coursera.org/learn/competencias-digitales-ofimatica,competencias-digitales-ofimatica
+Healing with the Arts,University of Florida,https://www.coursera.org/learn/healing-with-the-arts,healing-with-the-arts
+People Analytics,University of Pennsylvania,https://www.coursera.org/learn/wharton-people-analytics,wharton-people-analytics
+What is Social?,Northwestern University,https://www.coursera.org/learn/what-is-social,what-is-social
+UX Design Fundamentals,California Institute of the Arts,https://www.coursera.org/learn/ux-design-fundamentals,ux-design-fundamentals
+Creative Thinking: Techniques and Tools for Success,Imperial College London,https://www.coursera.org/learn/creative-thinking-techniques-and-tools-for-success,creative-thinking-techniques-and-tools-for-success
+Introduction to Classical Music,Yale University,https://www.coursera.org/learn/introclassicalmusic,introclassicalmusic
+Children's Human Rights - An Interdisciplinary Introduction,University of Geneva,https://www.coursera.org/learn/childrens-rights,childrens-rights
+Investment Management in an Evolving and Volatile World by HEC Paris and AXA Investment Managers,HEC Paris,https://www.coursera.org/learn/investment-management,investment-management
+Introduction to Data Analysis Using Excel,Rice University,https://www.coursera.org/learn/excel-data-analysis,excel-data-analysis
+Mind Control: Managing Your Mental Health During COVID-19,University of Toronto,https://www.coursera.org/learn/manage-health-covid-19,manage-health-covid-19
+Introduction to International Criminal Law,Case Western Reserve University,https://www.coursera.org/learn/international-criminal-law,international-criminal-law
+"FinTech: Foundations, Payments, and Regulations",University of Pennsylvania,https://www.coursera.org/learn/wharton-fintech-overview-payments-regulations,wharton-fintech-overview-payments-regulations
+Greek and Roman Mythology,University of Pennsylvania,https://www.coursera.org/learn/mythology,mythology
+Politics and Economics of International Energy,Sciences Po,https://www.coursera.org/learn/global-energy,global-energy
+Continuous Delivery & DevOps,University of Virginia,https://www.coursera.org/learn/uva-darden-continous-delivery-devops,uva-darden-continous-delivery-devops
+Teach English Now! Foundational Principles,Arizona State University,https://www.coursera.org/learn/english-principles,english-principles
+Business Model Innovation,HEC Paris,https://www.coursera.org/learn/business-model,business-model
+Introduction to User Experience Principles and Processes,University of Michigan,https://www.coursera.org/learn/introtoux-principles-and-processes,introtoux-principles-and-processes
+Beyond the Sustainable Development Goals (SDGs): Addressing Sustainability and Development,University of Michigan,https://www.coursera.org/learn/beyond-the-sustainable-development-goals-addressing-sustainability-and-development,beyond-the-sustainable-development-goals-addressing-sustainability-and-development
+Process Mining: Data science in Action,Eindhoven University of Technology,https://www.coursera.org/learn/process-mining,process-mining
+Fundamentals of Immunology: Innate Immunity and B-Cell Function,Rice University,https://www.coursera.org/learn/immunologyfundamentalsimmunitybcells,immunologyfundamentalsimmunitybcells
+Introduction to Corporate Finance,University of Pennsylvania,https://www.coursera.org/learn/wharton-finance,wharton-finance
+Global Diplomacy: the United Nations in the World,University of London,https://www.coursera.org/learn/global-diplomacy-un,global-diplomacy-un
+Algorithmic Toolbox,University of California San Diego,https://www.coursera.org/learn/algorithmic-toolbox,algorithmic-toolbox
+Troubles du spectre de l'autisme : diagnostic,University of Geneva,https://www.coursera.org/learn/troubles-spectre-autisme-diagnostic,troubles-spectre-autisme-diagnostic
+Anatomy: Musculoskeletal and Integumentary Systems,University of Michigan,https://www.coursera.org/learn/anatomy403-1x,anatomy403-1x
+Unraveling the Cycling City,University of Amsterdam,https://www.coursera.org/learn/unraveling-the-cycling-city,unraveling-the-cycling-city
+A Crash Course in Causality:  Inferring Causal Effects from Observational Data,University of Pennsylvania,https://www.coursera.org/learn/crash-course-in-causality,crash-course-in-causality
+English for Business and Entrepreneurship,University of Pennsylvania,https://www.coursera.org/learn/business,business
+Natural Language Processing with Attention Models,DeepLearning.AI,https://www.coursera.org/learn/attention-models-in-nlp,attention-models-in-nlp
+What is Compliance?,University of Pennsylvania,https://www.coursera.org/learn/what-is-compliance,what-is-compliance
+Getting Started with Google Sheets,Google Cloud,https://www.coursera.org/learn/getting-started-with-google-sheets,getting-started-with-google-sheets
+Data Visualization with Python,IBM,https://www.coursera.org/learn/python-for-data-visualization,python-for-data-visualization
+Foundations of Mindfulness,Rice University,https://www.coursera.org/learn/foundations-of-mindfulness,foundations-of-mindfulness
+Negociación exitosa: Estrategias y habilidades esenciales (en español),University of Michigan,https://www.coursera.org/learn/negociacion,negociacion
+Data-driven Decision Making,PwC,https://www.coursera.org/learn/decision-making,decision-making
+Fundamentals of Engineering Exam Review,Georgia Institute of Technology,https://www.coursera.org/learn/fe-exam,fe-exam
+Gender and Sexuality: Diversity and Inclusion in the Workplace,University of Pittsburgh,https://www.coursera.org/learn/gender-sexuality,gender-sexuality
+Managerial Accounting Fundamentals,University of Virginia,https://www.coursera.org/learn/uva-darden-managerial-accounting,uva-darden-managerial-accounting
+Search Engine Optimization Fundamentals,"University of California, Davis",https://www.coursera.org/learn/seo-fundamentals,seo-fundamentals
+Essentials of Global Health,Yale University,https://www.coursera.org/learn/essentials-global-health,essentials-global-health
+International Security Management,Erasmus University Rotterdam,https://www.coursera.org/learn/international-security-management,international-security-management
+Getting Started with AWS Machine Learning,Amazon Web Services,https://www.coursera.org/learn/aws-machine-learning,aws-machine-learning
+Arts and Heritage Management,Università Bocconi,https://www.coursera.org/learn/arts-heritage,arts-heritage
+Understanding Einstein: The Special Theory of Relativity,Stanford University,https://www.coursera.org/learn/einstein-relativity,einstein-relativity
+Réussir le Changement,ESSEC Business School,https://www.coursera.org/learn/reussir-le-changement,reussir-le-changement
+Equine Welfare and Management,"University of California, Davis",https://www.coursera.org/learn/equine,equine
+International migrations: a global issue,Sciences Po,https://www.coursera.org/learn/international-migrations,international-migrations
+Introduction to Web Development,"University of California, Davis",https://www.coursera.org/learn/web-development,web-development
+Writing and Editing: Word Choice and Word Order,University of Michigan,https://www.coursera.org/learn/writing-editing-words,writing-editing-words
+Introduction to the Digital Advertising Landscape,University of Colorado Boulder,https://www.coursera.org/learn/digital-advertising-landscape,digital-advertising-landscape
+Access Controls,(ISC)²,https://www.coursera.org/learn/access-control-sscp,access-control-sscp
+Engineering Project Management: Initiating and Planning,Rice University,https://www.coursera.org/learn/initiating-planning,initiating-planning
+Kotlin for Java Developers,JetBrains,https://www.coursera.org/learn/kotlin-for-java-developers,kotlin-for-java-developers
+Mathematics for Machine Learning: Multivariate Calculus,Imperial College London,https://www.coursera.org/learn/multivariate-calculus-machine-learning,multivariate-calculus-machine-learning
+Introduction to Git and GitHub,Google,https://www.coursera.org/learn/introduction-git-github,introduction-git-github
+Industrial Biotechnology,University of Manchester   ,https://www.coursera.org/learn/industrial-biotech,industrial-biotech
+The Addicted Brain,Emory University,https://www.coursera.org/learn/addiction-and-the-brain,addiction-and-the-brain
+Introducción a la programación en Python I: Aprendiendo a programar con Python,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/aprendiendo-programar-python,aprendiendo-programar-python
+Modernizing Data Lakes and Data Warehouses with GCP,Google Cloud,https://www.coursera.org/learn/data-lakes-data-warehouses-gcp,data-lakes-data-warehouses-gcp
+Drug Discovery,University of California San Diego,https://www.coursera.org/learn/drug-discovery,drug-discovery
+Nutrition and Lifestyle in Pregnancy,Ludwig-Maximilians-Universität München (LMU),https://www.coursera.org/learn/nutrition-pregnancy,nutrition-pregnancy
+Financial Acumen for Non-Financial Managers,University of Pennsylvania,https://www.coursera.org/learn/finance-healthcare-managers,finance-healthcare-managers
+Python and Statistics for Financial Analysis,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/python-statistics-financial-analysis,python-statistics-financial-analysis
+Bugs 101: Insect-Human Interactions,University of Alberta,https://www.coursera.org/learn/bugs-101,bugs-101
+Autodesk Certified Professional: Revit for Architectural Design Exam Prep,Autodesk,https://www.coursera.org/learn/autodesk-revit-architectural-design,autodesk-revit-architectural-design
+"Leading for Equity, Diversity and Inclusion in Higher Education",University of Michigan,https://www.coursera.org/learn/leading-for-equity-diversity-inclusion,leading-for-equity-diversity-inclusion
+Digital Media and Marketing Strategies,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/marketing-plan,marketing-plan
+Enterprise Architecture,Peter the Great St. Petersburg Polytechnic University,https://www.coursera.org/learn/enterprise-architecture,enterprise-architecture
+Introduction to Spreadsheets and Models,University of Pennsylvania,https://www.coursera.org/learn/wharton-introduction-spreadsheets-models,wharton-introduction-spreadsheets-models
+The Arts and Science of Relationships: Understanding Human Needs,University of Toronto,https://www.coursera.org/learn/human-needs,human-needs
+Essentials in Clinical Simulations Across the Health Professions,The George Washington University,https://www.coursera.org/learn/clinicalsimulations,clinicalsimulations
+Budgeting and Scheduling Projects,"University of California, Irvine",https://www.coursera.org/learn/schedule-projects,schedule-projects
+Machine Learning for Business Professionals,Google Cloud,https://www.coursera.org/learn/machine-learning-business-professionals,machine-learning-business-professionals
+Introduction to Accounting Data Analytics and Visualization,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/intro-accounting-data-analytics-visual,intro-accounting-data-analytics-visual
+Spanish Vocabulary: Meeting People,"University of California, Davis",https://www.coursera.org/learn/spanish-vocabulary-meeting-people,spanish-vocabulary-meeting-people
+Gestión Empresarial Exitosa para Pymes,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/gestionempresarialpyme,gestionempresarialpyme
+Public Policy Challenges of the 21st Century,University of Virginia,https://www.coursera.org/learn/public-policy,public-policy
+International Law in Action: the Arbitration of International Disputes,Universiteit Leiden,https://www.coursera.org/learn/arbitration-international-disputes,arbitration-international-disputes
+Introduction to Ancient Egypt and Its Civilization,University of Pennsylvania,https://www.coursera.org/learn/introancientegypt,introancientegypt
+Financing and Investing in Infrastructure,Università Bocconi,https://www.coursera.org/learn/infrastructure-investing,infrastructure-investing
+Global Environmental Management,Technical University of Denmark (DTU),https://www.coursera.org/learn/global-environmental-management,global-environmental-management
+Operations Analytics,University of Pennsylvania,https://www.coursera.org/learn/wharton-operations-analytics,wharton-operations-analytics
+Entrepreneurship Strategy: From Ideation to Exit,HEC Paris,https://www.coursera.org/learn/entrepreneurship-strategy,entrepreneurship-strategy
+FinTech Law and Policy,Duke University,https://www.coursera.org/learn/fintechlawandpolicy,fintechlawandpolicy
+The Social Context of Mental Health and Illness,University of Toronto,https://www.coursera.org/learn/mental-health,mental-health
+What Is Contemporary Art?,The Museum of Modern Art,https://www.coursera.org/learn/contemporary-art,contemporary-art
+The Art of Music Production,Berklee College of Music,https://www.coursera.org/learn/producing-music,producing-music
+Biohacking Your Brain's Health,Emory University,https://www.coursera.org/learn/biohacking-your-brains-health,biohacking-your-brains-health
+Bayesian Statistics: From Concept to Data Analysis,"University of California, Santa Cruz",https://www.coursera.org/learn/bayesian-statistics,bayesian-statistics
+Reporting extra-financier et stratégie RSE,ESSEC Business School,https://www.coursera.org/learn/reporting-extra-financier-strategie-rse,reporting-extra-financier-strategie-rse
+Leading Healthcare Quality and Safety,The George Washington University,https://www.coursera.org/learn/quality-healthcare,quality-healthcare
+Understanding International Relations Theory,National Research University Higher School of Economics,https://www.coursera.org/learn/international-relations-theory,international-relations-theory
+Introduction to Data Analytics,IBM,https://www.coursera.org/learn/introduction-to-data-analytics,introduction-to-data-analytics
+Fundamentos de Excel para Negocios,Universidad Austral,https://www.coursera.org/learn/excel-para-negocios,excel-para-negocios
+Elastic Google Cloud Infrastructure: Scaling and Automation,Google Cloud,https://www.coursera.org/learn/gcp-infrastructure-scaling-automation,gcp-infrastructure-scaling-automation
+Cultural Competence - Aboriginal Sydney,The University of Sydney,https://www.coursera.org/learn/cultural-competence-aboriginal-sydney,cultural-competence-aboriginal-sydney
+Fundamentos de Finanzas Empresariales,Universidad de los Andes,https://www.coursera.org/learn/finanzas-empresariales,finanzas-empresariales
+Greening the Economy: Sustainable Cities,Lund University,https://www.coursera.org/learn/gte-sustainable-cities,gte-sustainable-cities
+Introduction to Engineering Mechanics,Georgia Institute of Technology,https://www.coursera.org/learn/engineering-mechanics-statics,engineering-mechanics-statics
+Design-Led Strategy: Design thinking for business strategy and entrepreneurship,The University of Sydney,https://www.coursera.org/learn/design-strategy,design-strategy
+Biology Meets Programming: Bioinformatics for Beginners,University of California San Diego,https://www.coursera.org/learn/bioinformatics,bioinformatics
+Understanding Medical Research: Your Facebook Friend is Wrong,Yale University,https://www.coursera.org/learn/medical-research,medical-research
+Health Behavior Change: From Evidence to Action,Yale University,https://www.coursera.org/learn/health-behavior-change,health-behavior-change
+Ordered Data Structures,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/cs-fundamentals-2,cs-fundamentals-2
+Mindshift: Break Through Obstacles to Learning and Discover Your Hidden Potential,McMaster University,https://www.coursera.org/learn/mindshift,mindshift
+Programming Fundamentals,Duke University,https://www.coursera.org/learn/programming-fundamentals,programming-fundamentals
+Understanding Financial Markets,University of Geneva,https://www.coursera.org/learn/understanding-financial-markets,understanding-financial-markets
+In the Studio: Postwar Abstract Painting,The Museum of Modern Art,https://www.coursera.org/learn/painting,painting
+Drug Commercialization,University of California San Diego,https://www.coursera.org/learn/drug-commercialization,drug-commercialization
+Introduction to Software Product Management,University of Alberta,https://www.coursera.org/learn/introduction-to-software-product-management,introduction-to-software-product-management
+"Social Norms, Social Change I",Unicef,https://www.coursera.org/learn/norms,norms
+Excel Skills for Business: Intermediate II,Macquarie University,https://www.coursera.org/learn/excel-intermediate-2,excel-intermediate-2
+Aboriginal Worldviews and Education,University of Toronto,https://www.coursera.org/learn/aboriginal-education,aboriginal-education
+"Information Systems Auditing, Controls and Assurance",The Hong Kong University of Science and Technology,https://www.coursera.org/learn/information-systems-audit,information-systems-audit
+Six Sigma Principles,University System of Georgia,https://www.coursera.org/learn/six-sigma-principles,six-sigma-principles
+Business Writing,University of Colorado Boulder,https://www.coursera.org/learn/writing-for-business,writing-for-business
+Autodesk Certified Professional: AutoCAD for Design and Drafting Exam Prep,Autodesk,https://www.coursera.org/learn/autodesk-autocad-design-drafting,autodesk-autocad-design-drafting
+Introduction to Typography,California Institute of the Arts,https://www.coursera.org/learn/typography,typography
+Customer Segmentation and Prospecting,Northwestern University,https://www.coursera.org/learn/customer-segmentation-prospecting,customer-segmentation-prospecting
+Claves para Gestionar Personas,IESE Business School,https://www.coursera.org/learn/gestionar-personas,gestionar-personas
+English for Journalism,University of Pennsylvania,https://www.coursera.org/learn/journalism,journalism
+How Things Work: An Introduction to Physics,University of Virginia,https://www.coursera.org/learn/how-things-work,how-things-work
+Business English: Networking,University of Washington,https://www.coursera.org/learn/business-english-intro,business-english-intro
+Summary Statistics in Public Health,Johns Hopkins University,https://www.coursera.org/learn/summary-statistics,summary-statistics
+The Changing Global Order,Universiteit Leiden,https://www.coursera.org/learn/changing-global-order,changing-global-order
+Global Energy and Climate Policy,University of London,https://www.coursera.org/learn/globalenergyandclimatepolicy,globalenergyandclimatepolicy
+El Abogado del Futuro: Legaltech y la Transformación Digital del Derecho,Universidad Austral,https://www.coursera.org/learn/legaltech,legaltech
+Probability and Statistics: To p or not to p?,University of London,https://www.coursera.org/learn/probability-statistics,probability-statistics
+Gut Check: Exploring Your Microbiome,University of Colorado Boulder,https://www.coursera.org/learn/microbiome,microbiome
+Econometrics: Methods and Applications,Erasmus University Rotterdam,https://www.coursera.org/learn/erasmus-econometrics,erasmus-econometrics
+Разработка веб-сервисов на Go - основы языка,Moscow Institute of Physics and Technology,https://www.coursera.org/learn/golang-webservices-1,golang-webservices-1
+Mastering Data Analysis in Excel,Duke University,https://www.coursera.org/learn/analytics-excel,analytics-excel
+Basic Statistics,University of Amsterdam,https://www.coursera.org/learn/basic-statistics,basic-statistics
+"Capstone: Retrieving, Processing, and Visualizing Data with Python",University of Michigan,https://www.coursera.org/learn/python-data-visualization,python-data-visualization
+Design Thinking for the Greater Good: Innovation in the Social Sector,University of Virginia,https://www.coursera.org/learn/uva-darden-design-thinking-social-sector,uva-darden-design-thinking-social-sector
+Introduction to Portfolio Construction and Analysis with Python,EDHEC Business School,https://www.coursera.org/learn/introduction-portfolio-construction-python,introduction-portfolio-construction-python
+Data Analytics for Lean Six Sigma,University of Amsterdam,https://www.coursera.org/learn/data-analytics-for-lean-six-sigma,data-analytics-for-lean-six-sigma
+Refugees in the 21st Century,University of London,https://www.coursera.org/learn/refugees-21st-century,refugees-21st-century
+Building Containerized Applications on AWS,Amazon Web Services,https://www.coursera.org/learn/containerized-apps-on-aws,containerized-apps-on-aws
+Business Transformation with Google Cloud,Google Cloud,https://www.coursera.org/learn/business-transformation-google-cloud,business-transformation-google-cloud
+Version Control with Git,Atlassian,https://www.coursera.org/learn/version-control-with-git,version-control-with-git
+"Transmedia Storytelling: Narrative worlds, emerging technologies, and global audiences",UNSW Sydney (The University of New South Wales),https://www.coursera.org/learn/transmedia-storytelling,transmedia-storytelling
+Excel aplicado a los negocios (Nivel Avanzado),Universidad Austral,https://www.coursera.org/learn/excel-aplicado-negocios-avanzado,excel-aplicado-negocios-avanzado
+Introduction to Public Speaking,University of Washington,https://www.coursera.org/learn/public-speaking,public-speaking
+Building Conversational Experiences with Dialogflow,Google Cloud,https://www.coursera.org/learn/conversational-experiences-dialogflow,conversational-experiences-dialogflow
+Guitar for Beginners,Berklee College of Music,https://www.coursera.org/learn/guitar,guitar
+Managing Project Risks and Changes,"University of California, Irvine",https://www.coursera.org/learn/project-risk-management,project-risk-management
+L'excellence opérationnelle en pratique,ESSEC Business School,https://www.coursera.org/learn/excellence-operationnelle,excellence-operationnelle
+Introduction to Cloud Computing,IBM,https://www.coursera.org/learn/introduction-to-cloud,introduction-to-cloud
+Sample-based Learning Methods,Alberta Machine Intelligence Institute,https://www.coursera.org/learn/sample-based-learning-methods,sample-based-learning-methods
+Functional Programming Principles in Scala,École Polytechnique Fédérale de Lausanne,https://www.coursera.org/learn/progfun1,progfun1
+Introduction to Blockchain Technologies,INSEAD,https://www.coursera.org/learn/introduction-blockchain-technologies,introduction-blockchain-technologies
+Introduction to Environmental Law and Policy,The University of North Carolina at Chapel Hill,https://www.coursera.org/learn/environmental-law,environmental-law
+"Cameras, Exposure, and Photography",Michigan State University,https://www.coursera.org/learn/exposure-photography,exposure-photography
+Democracia y decisiones públicas. Introducción al análisis de políticas públicas,Universitat Autònoma de Barcelona,https://www.coursera.org/learn/democracia,democracia
+Dentistry 101,University of Michigan,https://www.coursera.org/learn/dentistry101,dentistry101
+"Python Functions, Files, and Dictionaries",University of Michigan,https://www.coursera.org/learn/python-functions-files-dictionaries,python-functions-files-dictionaries
+Anticorrupción: Introducción a conceptos y perspectiva práctica,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/anticorrupcion-introduccion,anticorrupcion-introduccion
+Positive Psychology: Applications and Interventions,University of Pennsylvania,https://www.coursera.org/learn/positive-psychology-applications,positive-psychology-applications
+Introduction to Embedded Systems Software and Development Environments,University of Colorado Boulder,https://www.coursera.org/learn/introduction-embedded-systems,introduction-embedded-systems
+Personal & Family Financial Planning,University of Florida,https://www.coursera.org/learn/family-planning,family-planning
+A Law Student's Toolkit,Yale University,https://www.coursera.org/learn/law-student,law-student
+Introducción a Data Science: Programación Estadística con R,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/intro-data-science-programacion-estadistica-r,intro-data-science-programacion-estadistica-r
+"Cybersecurity Roles, Processes & Operating System Security",IBM,https://www.coursera.org/learn/cybersecurity-roles-processes-operating-system-security,cybersecurity-roles-processes-operating-system-security
+Computational Neuroscience,University of Washington,https://www.coursera.org/learn/computational-neuroscience,computational-neuroscience
+De-Mystifying Mindfulness,Universiteit Leiden,https://www.coursera.org/learn/mindfulness,mindfulness
+Smart Cities – Management of Smart Urban Infrastructures,École Polytechnique Fédérale de Lausanne,https://www.coursera.org/learn/smart-cities,smart-cities
+Getting Started with Go,"University of California, Irvine",https://www.coursera.org/learn/golang-getting-started,golang-getting-started
+Introduction to Economic Theories,Erasmus University Rotterdam,https://www.coursera.org/learn/intro-economic-theories,intro-economic-theories
+Probabilistic Graphical Models 1: Representation,Stanford University,https://www.coursera.org/learn/probabilistic-graphical-models,probabilistic-graphical-models
+The Power of Microeconomics: Economic Principles in the Real World,"University of California, Irvine",https://www.coursera.org/learn/principles-of-microeconomics,principles-of-microeconomics
+Introduction to Personal Branding,University of Virginia,https://www.coursera.org/learn/personal-branding,personal-branding
+Love as a Force for Social Justice,Stanford University,https://www.coursera.org/learn/love-social-justice,love-social-justice
+Mathematical Thinking in Computer Science,University of California San Diego,https://www.coursera.org/learn/what-is-a-proof,what-is-a-proof
+Introduction to Genetics and Evolution,Duke University,https://www.coursera.org/learn/genetics-evolution,genetics-evolution
+Основы программирования на Python,National Research University Higher School of Economics,https://www.coursera.org/learn/python-osnovy-programmirovaniya,python-osnovy-programmirovaniya
+Improving Communication Skills,University of Pennsylvania,https://www.coursera.org/learn/wharton-communication-skills,wharton-communication-skills
+"Introduction to Trading, Machine Learning & GCP",New York Institute of Finance,https://www.coursera.org/learn/introduction-trading-machine-learning-gcp,introduction-trading-machine-learning-gcp
+Python Programming: A Concise Introduction,Wesleyan University,https://www.coursera.org/learn/python-programming-introduction,python-programming-introduction
+"The Modern World, Part Two: Global History since 1910",University of Virginia,https://www.coursera.org/learn/modern-world-2,modern-world-2
+Understanding Plants - Part I: What a Plant Knows,Tel Aviv University,https://www.coursera.org/learn/plantknows,plantknows
+Excel Fundamentals for Data Analysis,Macquarie University,https://www.coursera.org/learn/excel-data-analysis-fundamentals,excel-data-analysis-fundamentals
+Finanzas personales,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/finanzas-personales,finanzas-personales
+English Composition I,Duke University,https://www.coursera.org/learn/english-composition,english-composition
+Career 911: Your Future Job in Medicine and Healthcare,Northwestern University,https://www.coursera.org/learn/healthcarejobs,healthcarejobs
+Introduction to Self-Driving Cars,University of Toronto,https://www.coursera.org/learn/intro-self-driving-cars,intro-self-driving-cars
+Corporate & Commercial Law I: Contracts & Employment Law,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/corporate-commercial-law-part1,corporate-commercial-law-part1
+Blockchain Basics,University at Buffalo,https://www.coursera.org/learn/blockchain-basics,blockchain-basics
+Foundations of Business Strategy,University of Virginia,https://www.coursera.org/learn/uva-darden-foundations-business-strategy,uva-darden-foundations-business-strategy
+Introdução à Ciência da Computação com Python Parte 1,Universidade de São Paulo,https://www.coursera.org/learn/ciencia-computacao-python-conceitos,ciencia-computacao-python-conceitos
+Stochastic processes,National Research University Higher School of Economics,https://www.coursera.org/learn/stochasticprocesses,stochasticprocesses
+Foundations for Big Data Analysis with SQL,Cloudera,https://www.coursera.org/learn/foundations-big-data-analysis-sql,foundations-big-data-analysis-sql
+"Innovation Through Design: Think, Make, Break, Repeat",The University of Sydney,https://www.coursera.org/learn/innovation-through-design,innovation-through-design
+Perfect Tenses and Modals,"University of California, Irvine",https://www.coursera.org/learn/perfect-tenses-modals,perfect-tenses-modals
+Getting Started with Azure,LearnQuest,https://www.coursera.org/learn/cloud-azure-intro,cloud-azure-intro
+Moralities of Everyday Life,Yale University,https://www.coursera.org/learn/moralities,moralities
+Revisão Sistemática e Meta-análise,Universidade Estadual de Campinas,https://www.coursera.org/learn/revisao-sistematica,revisao-sistematica
+Understanding child development: from synapse to society,Utrecht University,https://www.coursera.org/learn/child-development,child-development
+Introduction to G Suite,Google Cloud,https://www.coursera.org/learn/introduction-g-suite,introduction-g-suite
+Aprendiendo a aprender: Poderosas herramientas mentales con las que podrás dominar temas difíciles (Learning How to Learn),University of California San Diego,https://www.coursera.org/learn/aprendiendo-a-aprender,aprendiendo-a-aprender
+Building Batch Data Pipelines on GCP,Google Cloud,https://www.coursera.org/learn/batch-data-pipelines-gcp,batch-data-pipelines-gcp
+Financial Planning for Young Adults,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/financial-planning,financial-planning
+Quantitative Methods,University of Amsterdam,https://www.coursera.org/learn/quantitative-methods,quantitative-methods
+"Introduction to Self-Determination Theory: An approach to motivation, development and wellness",University of Rochester,https://www.coursera.org/learn/self-determination-theory,self-determination-theory
+The Technology of Music Production,Berklee College of Music,https://www.coursera.org/learn/technology-of-music-production,technology-of-music-production
+Code Yourself! An Introduction to Programming,The University of Edinburgh,https://www.coursera.org/learn/intro-programming,intro-programming
+Success,University of Pennsylvania,https://www.coursera.org/learn/wharton-success,wharton-success
+Chemicals and Health,Johns Hopkins University,https://www.coursera.org/learn/chemicals-health,chemicals-health
+Improving your statistical inferences,Eindhoven University of Technology,https://www.coursera.org/learn/statistical-inferences,statistical-inferences
+Fundamentals of Finance,University of Pennsylvania,https://www.coursera.org/learn/finance-fundamentals,finance-fundamentals
+How Google does Machine Learning,Google Cloud,https://www.coursera.org/learn/google-machine-learning,google-machine-learning
+Object-Oriented Design,University of Alberta,https://www.coursera.org/learn/object-oriented-design,object-oriented-design
+Introduction to Intellectual Property,University of Pennsylvania,https://www.coursera.org/learn/introduction-intellectual-property,introduction-intellectual-property
+Cost and Economics in Pricing Strategy,BCG,https://www.coursera.org/learn/uva-darden-bcg-pricing-strategy-cost-economics,uva-darden-bcg-pricing-strategy-cost-economics
+Write A Feature Length Screenplay For Film Or Television,Michigan State University,https://www.coursera.org/learn/write-a-feature-length-screenplay-for-film-or-television,write-a-feature-length-screenplay-for-film-or-television
+Marketing Gerencial,Universidad de Chile,https://www.coursera.org/learn/marketing-gerencial,marketing-gerencial
+Corporate Finance Essentials,IESE Business School,https://www.coursera.org/learn/corporate-finance-essentials,corporate-finance-essentials
+Information Security: Context and Introduction,"Royal Holloway, University of London",https://www.coursera.org/learn/information-security-data,information-security-data
+"Anatomy of the Chest, Abdomen, and Pelvis",Yale University,https://www.coursera.org/learn/trunk-anatomy,trunk-anatomy
+Introduction to CSS3,University of Michigan,https://www.coursera.org/learn/introcss,introcss
+Applied Data Science Capstone,IBM,https://www.coursera.org/learn/applied-data-science-capstone,applied-data-science-capstone
+Introduction aux Droits de l’Homme,University of Geneva,https://www.coursera.org/learn/droits-de-lhomme,droits-de-lhomme
+"Programming Languages, Part A",University of Washington,https://www.coursera.org/learn/programming-languages,programming-languages
+Big History: Connecting Knowledge,Macquarie University,https://www.coursera.org/learn/big-history,big-history
+Leadership in 21st Century Organizations,Copenhagen Business School,https://www.coursera.org/learn/leadership-21st-century,leadership-21st-century
+Software Processes and Agile Practices,University of Alberta,https://www.coursera.org/learn/software-processes-and-agile-practices,software-processes-and-agile-practices
+DevOps Culture and Mindset,"University of California, Davis",https://www.coursera.org/learn/devops-culture-and-mindset,devops-culture-and-mindset
+Introduction to Statistics & Data Analysis in Public Health,Imperial College London,https://www.coursera.org/learn/introduction-statistics-data-analysis-public-health,introduction-statistics-data-analysis-public-health
+Discrete Optimization,The University of Melbourne,https://www.coursera.org/learn/discrete-optimization,discrete-optimization
+Основы разработки на C++: белый пояс,Moscow Institute of Physics and Technology,https://www.coursera.org/learn/c-plus-plus-white,c-plus-plus-white
+COVID-19 Contact Tracing For Nursing Professionals,University of Houston,https://www.coursera.org/learn/covid-19-contact-tracing-for-nursing-professionals,covid-19-contact-tracing-for-nursing-professionals
+High Stakes Leadership: Leading in Times of Crisis,University of Michigan,https://www.coursera.org/learn/high-stakes-leadership,high-stakes-leadership
+Essential Epidemiologic Tools for Public Health Practice,Johns Hopkins University,https://www.coursera.org/learn/epidemiology-tools,epidemiology-tools
+Epigenetic Control of Gene Expression,The University of Melbourne,https://www.coursera.org/learn/epigenetics,epigenetics
+"Recruiting, Hiring, and Onboarding Employees",University of Minnesota,https://www.coursera.org/learn/recruiting-hiring-onboarding-employees,recruiting-hiring-onboarding-employees
+AWS Fundamentals: Addressing Security Risk,Amazon Web Services,https://www.coursera.org/learn/aws-fundamentals-addressing-security-risk,aws-fundamentals-addressing-security-risk
+Ancient Philosophy: Plato & His Predecessors,University of Pennsylvania,https://www.coursera.org/learn/plato,plato
+Becoming a changemaker: Introduction to Social Innovation,University of Cape Town,https://www.coursera.org/learn/social-innovation,social-innovation
+Spanish for Successful Communication in Healthcare Settings,Rice University,https://www.coursera.org/learn/spanish-in-healthcare-settings,spanish-in-healthcare-settings
+Leading transformations: Manage change,Macquarie University,https://www.coursera.org/learn/change-management,change-management
+Introduction to Systems Engineering,UNSW Sydney (The University of New South Wales),https://www.coursera.org/learn/systems-engineering,systems-engineering
+Digital Marketing Analytics in Theory,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/marketing-analytics,marketing-analytics
+Data Visualization and Communication with Tableau,Duke University,https://www.coursera.org/learn/analytics-tableau,analytics-tableau
+Bayesian Statistics: Techniques and Models,"University of California, Santa Cruz",https://www.coursera.org/learn/mcmc-bayesian-statistics,mcmc-bayesian-statistics
+Human Rights for Open Societies,Utrecht University,https://www.coursera.org/learn/humanrights,humanrights
+Introduction to Computers and Office Productivity Software,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/introduction-to-computers-and-office-productivity-software,introduction-to-computers-and-office-productivity-software
+The Introduction to Quantum Computing,Saint Petersburg State University,https://www.coursera.org/learn/quantum-computing-algorithms,quantum-computing-algorithms
+Intercultural Management,ESCP Business School,https://www.coursera.org/learn/intercultural,intercultural
+Get Interactive: Practical Teaching with Technology,University of London,https://www.coursera.org/learn/getinmooc,getinmooc
+International Law In Action: Investigating and Prosecuting International Crimes,Universiteit Leiden,https://www.coursera.org/learn/international-law-in-action-2,international-law-in-action-2
+Global Financial Markets and Instruments,Rice University,https://www.coursera.org/learn/global-financial-markets-instruments,global-financial-markets-instruments
+Write Your First Novel,Michigan State University,https://www.coursera.org/learn/write-your-first-novel,write-your-first-novel
+Développement psychologique de l'enfant,University of Geneva,https://www.coursera.org/learn/enfant-developpement,enfant-developpement
+Songwriting: Writing the Lyrics,Berklee College of Music,https://www.coursera.org/learn/songwriting-lyrics,songwriting-lyrics
+"Applied Plotting, Charting & Data Representation in Python",University of Michigan,https://www.coursera.org/learn/python-plotting,python-plotting
+Systems Thinking In Public Health,Johns Hopkins University,https://www.coursera.org/learn/systems-thinking,systems-thinking
+Excel Skills for Business: Advanced,Macquarie University,https://www.coursera.org/learn/excel-advanced,excel-advanced
+Introduction to Neuroeconomics: How the Brain Makes Decisions,National Research University Higher School of Economics,https://www.coursera.org/learn/neuroeconomics,neuroeconomics
+Community Organizing for Social Justice,University of Michigan,https://www.coursera.org/learn/community-organizing,community-organizing
+Build a Modern Computer from First Principles: Nand to Tetris Part II (project-centered course),Hebrew University of Jerusalem,https://www.coursera.org/learn/nand2tetris2,nand2tetris2
+The Global Financial Crisis,Yale University,https://www.coursera.org/learn/global-financial-crisis,global-financial-crisis
+Agile и Scrum в работе над проектами и продуктами,E-Learning Development Fund,https://www.coursera.org/learn/upravleniya-proektami-agile-scrum,upravleniya-proektami-agile-scrum
+Renewable Energy and Green Building Entrepreneurship,Duke University,https://www.coursera.org/learn/renewable-energy-entrepreneurship,renewable-energy-entrepreneurship
+The Cycle: Management of Successful Arts and Cultural Organizations,"University of Maryland, College Park",https://www.coursera.org/learn/the-cycle,the-cycle
+Privacy Law and Data Protection,University of Pennsylvania,https://www.coursera.org/learn/privacy-law-data-protection,privacy-law-data-protection
+Building Modern Python Applications on AWS,Amazon Web Services,https://www.coursera.org/learn/building-modern-python-applications-on-aws,building-modern-python-applications-on-aws
+Digital Business Models,Lund University,https://www.coursera.org/learn/digital-business-models,digital-business-models
+"Everyday Excel, Part 2",University of Colorado Boulder,https://www.coursera.org/learn/everyday-excel-part-2,everyday-excel-part-2
+Reliable Google Cloud Infrastructure: Design and Process,Google Cloud,https://www.coursera.org/learn/cloud-infrastructure-design-process,cloud-infrastructure-design-process
+Introduction to Computer Programming,University of London,https://www.coursera.org/learn/introduction-to-computer-programming,introduction-to-computer-programming
+"Big Data Essentials: HDFS, MapReduce and Spark RDD",Yandex,https://www.coursera.org/learn/big-data-essentials,big-data-essentials
+Dermatology: Trip to skin,Novosibirsk State University ,https://www.coursera.org/learn/dermatology,dermatology
+Sustainable Tourism – promoting environmental public health,University of Copenhagen,https://www.coursera.org/learn/sustainable-tourism,sustainable-tourism
+Population Health During A Pandemic: Contact Tracing and Beyond,University of Houston,https://www.coursera.org/learn/contact-tracing-for-covid-19,contact-tracing-for-covid-19
+Social Impact Strategy: Tools for Entrepreneurs and Innovators,University of Pennsylvania,https://www.coursera.org/learn/social-impact,social-impact
+C for Everyone: Programming Fundamentals,"University of California, Santa Cruz",https://www.coursera.org/learn/c-for-everyone,c-for-everyone
+Introduction to Structured Query Language (SQL),University of Michigan,https://www.coursera.org/learn/intro-sql,intro-sql
+Social and Economic Networks:  Models and Analysis,Stanford University,https://www.coursera.org/learn/social-economic-networks,social-economic-networks
+The Truth About Cats and Dogs,The University of Edinburgh,https://www.coursera.org/learn/cats-and-dogs,cats-and-dogs
+Sports and Society,Duke University,https://www.coursera.org/learn/sports-society,sports-society
+Fundamentals of Scalable Data Science,IBM,https://www.coursera.org/learn/ds,ds
+Effective Compliance Programs,University of Pennsylvania,https://www.coursera.org/learn/effective-compliance-programs,effective-compliance-programs
+Transformation of the Global Food System,University of Copenhagen,https://www.coursera.org/learn/transformation-global-food-system,transformation-global-food-system
+Web Application Technologies and Django,University of Michigan,https://www.coursera.org/learn/django-database-web-apps,django-database-web-apps
+Curanderismo: Traditional Healing Using Plants,University of New Mexico,https://www.coursera.org/learn/curanderismo-plants,curanderismo-plants
+Applied Machine Learning in Python,University of Michigan,https://www.coursera.org/learn/python-machine-learning,python-machine-learning
+Troubleshooting and Debugging Techniques,Google,https://www.coursera.org/learn/troubleshooting-debugging-techniques,troubleshooting-debugging-techniques
+Introduction to C# Programming and Unity,University of Colorado System,https://www.coursera.org/learn/introduction-programming-unity,introduction-programming-unity
+"Corrección, estilo y  variaciones de la lengua española",Universitat Autònoma de Barcelona,https://www.coursera.org/learn/correccion-estilo-variaciones,correccion-estilo-variaciones
+Les Fondamentaux de la Négociation,ESSEC Business School,https://www.coursera.org/learn/fondamentaux-negociation,fondamentaux-negociation
+Introduction to Clinical Data,Stanford University,https://www.coursera.org/learn/introduction-clinical-data,introduction-clinical-data
+The Science of Success: What Researchers Know that You Should Know,University of Michigan,https://www.coursera.org/learn/success,success
+Finance for Non-Financial Managers,Emory University,https://www.coursera.org/learn/finance-for-non-financial-managers,finance-for-non-financial-managers
+Getting Started With Music Theory,Michigan State University,https://www.coursera.org/learn/music-theory,music-theory
+Digital Marketing Analytics in Practice,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/digital-analytics,digital-analytics
+The Horse Course: Introduction to Basic Care and Management,University of Florida,https://www.coursera.org/learn/horse-care,horse-care
+Verb Tenses and Passives,"University of California, Irvine",https://www.coursera.org/learn/verb-passives,verb-passives
+Gestión de organizaciones efectivas,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/gestion-organizaciones-efectivas,gestion-organizaciones-efectivas
+Introduction to Big Data,University of California San Diego,https://www.coursera.org/learn/big-data-introduction,big-data-introduction
+Rédaction de contrats,University of Geneva,https://www.coursera.org/learn/contrats,contrats
+Philosophy and the Sciences: Introduction to the Philosophy of Cognitive Sciences,The University of Edinburgh,https://www.coursera.org/learn/philosophy-cognitive-sciences,philosophy-cognitive-sciences
+Schizophrenia,Wesleyan University,https://www.coursera.org/learn/schizophrenia,schizophrenia
+Ecology: Ecosystem Dynamics and Conservation,Howard Hughes Medical Institute ,https://www.coursera.org/learn/ecology-conservation,ecology-conservation
+Introduction to Game Development,Michigan State University,https://www.coursera.org/learn/game-development,game-development
+Practical Time Series Analysis,The State University of New York,https://www.coursera.org/learn/practical-time-series-analysis,practical-time-series-analysis
+VLSI CAD Part I: Logic,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/vlsi-cad-logic,vlsi-cad-logic
+Getting Started with Google Kubernetes Engine,Google Cloud,https://www.coursera.org/learn/google-kubernetes-engine,google-kubernetes-engine
+Exploring and Preparing your Data with BigQuery,Google Cloud,https://www.coursera.org/learn/gcp-exploring-preparing-data-bigquery,gcp-exploring-preparing-data-bigquery
+Six Sigma and the Organization (Advanced),University System of Georgia,https://www.coursera.org/learn/six-sigma-organization-advanced,six-sigma-organization-advanced
+Social Work Practice: Advocating Social Justice and Change,University of Michigan,https://www.coursera.org/learn/social-work-practice-advocating-social-justice-and-change,social-work-practice-advocating-social-justice-and-change
+Introduction to Machine Learning,Duke University,https://www.coursera.org/learn/machine-learning-duke,machine-learning-duke
+Entrepreneurship 2: Launching your Start-Up,University of Pennsylvania,https://www.coursera.org/learn/wharton-launching-startup,wharton-launching-startup
+Nanotechnology: A Maker’s Course,Duke University,https://www.coursera.org/learn/nanotechnology,nanotechnology
+Creative Problem Solving,University of Minnesota,https://www.coursera.org/learn/creative-problem-solving,creative-problem-solving
+"Sleep: Neurobiology, Medicine, and Society",University of Michigan,https://www.coursera.org/learn/sleep,sleep
+COVID-19: What You Need to Know (CME Eligible),Osmosis,https://www.coursera.org/learn/covid-19-what-you-need-to-know,covid-19-what-you-need-to-know
+Classical Sociological Theory,University of Amsterdam,https://www.coursera.org/learn/classical-sociological-theory,classical-sociological-theory
+Electric Industry Operations and Markets,Duke University,https://www.coursera.org/learn/electricity,electricity
+Preparing for the Google Cloud Professional Cloud Architect Exam,Google Cloud,https://www.coursera.org/learn/preparing-cloud-professional-cloud-architect-exam,preparing-cloud-professional-cloud-architect-exam
+Effective Business Presentations with Powerpoint,PwC,https://www.coursera.org/learn/powerpoint-presentations,powerpoint-presentations
+More Introduction to Financial Accounting,University of Pennsylvania,https://www.coursera.org/learn/wharton-financial-accounting,wharton-financial-accounting
+Cryptocurrency and Blockchain: An Introduction to Digital Currencies,University of Pennsylvania,https://www.coursera.org/learn/wharton-cryptocurrency-blockchain-introduction-digital-currency,wharton-cryptocurrency-blockchain-introduction-digital-currency
+Dairy Production and Management,The Pennsylvania State University,https://www.coursera.org/learn/dairy-production,dairy-production
+Think Again I: How to Understand Arguments,Duke University,https://www.coursera.org/learn/understanding-arguments,understanding-arguments
+Developing Your Musicianship,Berklee College of Music,https://www.coursera.org/learn/develop-your-musicianship,develop-your-musicianship
+Introduction to Operations Management,University of Pennsylvania,https://www.coursera.org/learn/wharton-operations,wharton-operations
+The Oral Cavity: Portal to Health and Disease,University of Pennsylvania,https://www.coursera.org/learn/oralcavity,oralcavity
+Preparing for the Google Cloud Associate Cloud Engineer Exam,Google Cloud,https://www.coursera.org/learn/preparing-cloud-associate-cloud-engineer-exam,preparing-cloud-associate-cloud-engineer-exam
+Fundamental Neuroscience for Neuroimaging,Johns Hopkins University,https://www.coursera.org/learn/neuroscience-neuroimaging,neuroscience-neuroimaging
+European Business Law: Understanding the Fundamentals,Lund University,https://www.coursera.org/learn/european-law-fundamentals,european-law-fundamentals
+Teach English Now! Teaching Language Online,Arizona State University,https://www.coursera.org/learn/teachlanguageonline,teachlanguageonline
+Front-End Web Development with React,The Hong Kong University of Science and Technology,https://www.coursera.org/learn/front-end-react,front-end-react
+Music Business Foundations,Berklee College of Music,https://www.coursera.org/learn/music-business-foundations,music-business-foundations
+Introduction to Business Analytics: Communicating with Data,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/intro-business-analytics,intro-business-analytics
+Security & Safety Challenges in a Globalized World,Universiteit Leiden,https://www.coursera.org/learn/security-safety-globalized-world,security-safety-globalized-world
+Machine Learning Foundations: A Case Study Approach,University of Washington,https://www.coursera.org/learn/ml-foundations,ml-foundations
+Accounting Analytics,University of Pennsylvania,https://www.coursera.org/learn/accounting-analytics,accounting-analytics
+Strategic Business Management - Microeconomics,"University of California, Irvine",https://www.coursera.org/learn/strategic-business-management-microeconomics,strategic-business-management-microeconomics
+"Epidemics, Pandemics and Outbreaks",University of Pittsburgh,https://www.coursera.org/learn/epidemic-pandemic-outbreak,epidemic-pandemic-outbreak
+Roman Architecture,Yale University,https://www.coursera.org/learn/roman-architecture,roman-architecture
+Research Data Management and Sharing,The University of Edinburgh,https://www.coursera.org/learn/data-management,data-management
+Introduction to Genomic Technologies,Johns Hopkins University,https://www.coursera.org/learn/introduction-genomics,introduction-genomics
+Strategic Management,Copenhagen Business School,https://www.coursera.org/learn/strategic-management,strategic-management
+Cybersecurity Compliance Framework & System Administration,IBM,https://www.coursera.org/learn/cybersecurity-compliance-framework-system-administration,cybersecurity-compliance-framework-system-administration
+Legal Tech & Startups,IE Business School,https://www.coursera.org/learn/legal-tech-startups,legal-tech-startups
+Introduction to Chemistry:  Reactions and Ratios,Duke University,https://www.coursera.org/learn/intro-chemistry,intro-chemistry
+The Science of Stem Cells,American Museum of Natural History,https://www.coursera.org/learn/stem-cells,stem-cells
+The Business of Product Management I,Advancing Women in Product,https://www.coursera.org/learn/the-business-of-product-management-one,the-business-of-product-management-one
+Positive Psychology: Resilience Skills,University of Pennsylvania,https://www.coursera.org/learn/positive-psychology-resilience,positive-psychology-resilience
+AI for Medical Prognosis,DeepLearning.AI,https://www.coursera.org/learn/ai-for-medical-prognosis,ai-for-medical-prognosis
+Antibiotic Stewardship,Stanford University,https://www.coursera.org/learn/antibiotic-stewardship,antibiotic-stewardship
+UX / UI: Fundamentos para o design de interface,Universidade de São Paulo,https://www.coursera.org/learn/ux-ui-design-de-interface,ux-ui-design-de-interface
+EMT Foundations,University of Colorado System,https://www.coursera.org/learn/emt-foundations,emt-foundations
+Industrial IoT on Google Cloud Platform,Google Cloud,https://www.coursera.org/learn/iiot-google-cloud-platform,iiot-google-cloud-platform
+"Penetration Testing, Incident Response and Forensics",IBM,https://www.coursera.org/learn/ibm-penetration-testing-incident-response-forensics,ibm-penetration-testing-incident-response-forensics
+Database Management Essentials,University of Colorado System,https://www.coursera.org/learn/database-management,database-management
+Advertising and Society,Duke University,https://www.coursera.org/learn/role-of-advertising,role-of-advertising
+Everyday Chinese Medicine,The Chinese University of Hong Kong,https://www.coursera.org/learn/everyday-chinese-medicine,everyday-chinese-medicine
+Fundamentals of Machine Learning for Healthcare,Stanford University,https://www.coursera.org/learn/fundamental-machine-learning-healthcare,fundamental-machine-learning-healthcare
+New Approaches to Countering Terror: Countering Violent Extremism,"University of Maryland, College Park",https://www.coursera.org/learn/countering-terror-violent-extremism,countering-terror-violent-extremism
+Magic in the Middle Ages,Universitat de Barcelona,https://www.coursera.org/learn/magic-middle-ages,magic-middle-ages
+Challenging Forensic Science: How Science Should Speak to Court,University of Lausanne,https://www.coursera.org/learn/challenging-forensic-science,challenging-forensic-science
+Exploring Renewable Energy Schemes,University of Pennsylvania,https://www.coursera.org/learn/exploring-renewable-energy,exploring-renewable-energy
+Paleontology: Theropod Dinosaurs and the Origin of Birds,University of Alberta,https://www.coursera.org/learn/theropods-birds,theropods-birds
+Corporate Strategy,UCL School of Management,https://www.coursera.org/learn/corporatestrategy,corporatestrategy
+Getting Started with Essay Writing,"University of California, Irvine",https://www.coursera.org/learn/getting-started-with-essay-writing,getting-started-with-essay-writing
+Actualización en el manejo del paciente con diabetes mellitus tipo 2,Universidad Nacional Autónoma de México,https://www.coursera.org/learn/actualizacion-manejo-diabetes-tipo-2,actualizacion-manejo-diabetes-tipo-2
+Mastering Final Cut Pro,LearnQuest,https://www.coursera.org/learn/mastering-final-cut-pro,mastering-final-cut-pro
+Removing Barriers to Change,University of Pennsylvania,https://www.coursera.org/learn/removing-barriers-to-change,removing-barriers-to-change
+Tricky American English Pronunciation,"University of California, Irvine",https://www.coursera.org/learn/tricky-american-english-pronunciation,tricky-american-english-pronunciation
+Managing an Agile Team,University of Virginia,https://www.coursera.org/learn/uva-darden-agile-team-management,uva-darden-agile-team-management
+International Law in Action: A Guide to the International Courts and Tribunals in The Hague,Universiteit Leiden,https://www.coursera.org/learn/international-law-in-action,international-law-in-action
+"Competencias Laborales: Perfiles, Evaluación y Capacitación.",Universidad de Chile,https://www.coursera.org/learn/competencias-lab,competencias-lab
+Getting started with TensorFlow 2,Imperial College London,https://www.coursera.org/learn/getting-started-with-tensor-flow2,getting-started-with-tensor-flow2
+Geopolitics of Europe,Sciences Po,https://www.coursera.org/learn/geopolitics-europe,geopolitics-europe
+Osteoarchaeology: The Truth in Our Bones,Universiteit Leiden,https://www.coursera.org/learn/truthinourbones-osteoarchaeology-archaeology,truthinourbones-osteoarchaeology-archaeology
+Mathematics for Machine Learning: PCA,Imperial College London,https://www.coursera.org/learn/pca-machine-learning,pca-machine-learning
+Object Oriented Programming in Java,University of California San Diego,https://www.coursera.org/learn/object-oriented-java,object-oriented-java
+Embedded Software and Hardware Architecture,University of Colorado Boulder,https://www.coursera.org/learn/embedded-software-hardware,embedded-software-hardware
+Memoir and Personal Essay: Managing Your Relationship with the Reader,Wesleyan University,https://www.coursera.org/learn/memoir-reader-relationship,memoir-reader-relationship
+Математика и Python для анализа данных,Moscow Institute of Physics and Technology,https://www.coursera.org/learn/mathematics-and-python,mathematics-and-python
+Hacia una práctica constructivista en el aula,Pontificia Universidad Católica de Chile,https://www.coursera.org/learn/aulaconstructivista,aulaconstructivista
+Hypothesis-Driven Development,University of Virginia,https://www.coursera.org/learn/uva-darden-agile-testing,uva-darden-agile-testing
+Accounting Data Analytics with Python,University of Illinois at Urbana-Champaign,https://www.coursera.org/learn/accounting-data-analytics-python,accounting-data-analytics-python
+Introduction to Molecular Spectroscopy,University of Manchester   ,https://www.coursera.org/learn/spectroscopy,spectroscopy
+Managing as a Coach,"University of California, Davis",https://www.coursera.org/learn/managing-as-a-coach,managing-as-a-coach
+The fundamentals of hotel distribution,ESSEC Business School,https://www.coursera.org/learn/hotel-distribution,hotel-distribution
+A Crash Course in Data Science,Johns Hopkins University,https://www.coursera.org/learn/data-science-course,data-science-course

course_feedback_nlp/Untitled.ipynb ADDED Viewed

	@@ -0,0 +1,418 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "2c0bc557-3218-4715-900e-491cc5560b6a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "a2d59d9a-5855-4a21-9988-3ea5dd2bb43c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reviews_df = pd.read_csv(\"Coursera_reviews.csv\")\n",
+    "courses_df = pd.read_csv(\"Coursera_courses.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "7af404ff-07f8-489c-b350-263cb33bb277",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>reviews</th>\n",
+       "      <th>reviewers</th>\n",
+       "      <th>date_reviews</th>\n",
+       "      <th>rating</th>\n",
+       "      <th>course_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Pretty dry, but I was able to pass with just t...</td>\n",
+       "      <td>By Robert S</td>\n",
+       "      <td>Feb 12, 2020</td>\n",
+       "      <td>4</td>\n",
+       "      <td>google-cbrs-cpi-training</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>would be a better experience if the video and ...</td>\n",
+       "      <td>By Gabriel E R</td>\n",
+       "      <td>Sep 28, 2020</td>\n",
+       "      <td>4</td>\n",
+       "      <td>google-cbrs-cpi-training</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Information was perfect! The program itself wa...</td>\n",
+       "      <td>By Jacob D</td>\n",
+       "      <td>Apr 08, 2020</td>\n",
+       "      <td>4</td>\n",
+       "      <td>google-cbrs-cpi-training</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>A few grammatical mistakes on test made me do ...</td>\n",
+       "      <td>By Dale B</td>\n",
+       "      <td>Feb 24, 2020</td>\n",
+       "      <td>4</td>\n",
+       "      <td>google-cbrs-cpi-training</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Excellent course and the training provided was...</td>\n",
+       "      <td>By Sean G</td>\n",
+       "      <td>Jun 18, 2020</td>\n",
+       "      <td>4</td>\n",
+       "      <td>google-cbrs-cpi-training</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                             reviews       reviewers  \\\n",
+       "0  Pretty dry, but I was able to pass with just t...     By Robert S   \n",
+       "1  would be a better experience if the video and ...  By Gabriel E R   \n",
+       "2  Information was perfect! The program itself wa...      By Jacob D   \n",
+       "3  A few grammatical mistakes on test made me do ...       By Dale B   \n",
+       "4  Excellent course and the training provided was...       By Sean G   \n",
+       "\n",
+       "   date_reviews  rating                 course_id  \n",
+       "0  Feb 12, 2020       4  google-cbrs-cpi-training  \n",
+       "1  Sep 28, 2020       4  google-cbrs-cpi-training  \n",
+       "2  Apr 08, 2020       4  google-cbrs-cpi-training  \n",
+       "3  Feb 24, 2020       4  google-cbrs-cpi-training  \n",
+       "4  Jun 18, 2020       4  google-cbrs-cpi-training  "
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "reviews_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "8e1bef72-cba2-4431-b111-03bc0c872ee0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>institution</th>\n",
+       "      <th>course_url</th>\n",
+       "      <th>course_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Machine Learning</td>\n",
+       "      <td>Stanford University</td>\n",
+       "      <td>https://www.coursera.org/learn/machine-learning</td>\n",
+       "      <td>machine-learning</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Indigenous Canada</td>\n",
+       "      <td>University of Alberta</td>\n",
+       "      <td>https://www.coursera.org/learn/indigenous-canada</td>\n",
+       "      <td>indigenous-canada</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>The Science of Well-Being</td>\n",
+       "      <td>Yale University</td>\n",
+       "      <td>https://www.coursera.org/learn/the-science-of-...</td>\n",
+       "      <td>the-science-of-well-being</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Technical Support Fundamentals</td>\n",
+       "      <td>Google</td>\n",
+       "      <td>https://www.coursera.org/learn/technical-suppo...</td>\n",
+       "      <td>technical-support-fundamentals</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Become a CBRS Certified Professional Installer...</td>\n",
+       "      <td>Google - Spectrum Sharing</td>\n",
+       "      <td>https://www.coursera.org/learn/google-cbrs-cpi...</td>\n",
+       "      <td>google-cbrs-cpi-training</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                name  \\\n",
+       "0                                   Machine Learning   \n",
+       "1                                  Indigenous Canada   \n",
+       "2                          The Science of Well-Being   \n",
+       "3                     Technical Support Fundamentals   \n",
+       "4  Become a CBRS Certified Professional Installer...   \n",
+       "\n",
+       "                 institution  \\\n",
+       "0        Stanford University   \n",
+       "1      University of Alberta   \n",
+       "2            Yale University   \n",
+       "3                     Google   \n",
+       "4  Google - Spectrum Sharing   \n",
+       "\n",
+       "                                          course_url  \\\n",
+       "0    https://www.coursera.org/learn/machine-learning   \n",
+       "1   https://www.coursera.org/learn/indigenous-canada   \n",
+       "2  https://www.coursera.org/learn/the-science-of-...   \n",
+       "3  https://www.coursera.org/learn/technical-suppo...   \n",
+       "4  https://www.coursera.org/learn/google-cbrs-cpi...   \n",
+       "\n",
+       "                        course_id  \n",
+       "0                machine-learning  \n",
+       "1               indigenous-canada  \n",
+       "2       the-science-of-well-being  \n",
+       "3  technical-support-fundamentals  \n",
+       "4        google-cbrs-cpi-training  "
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "courses_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0ba0e446-f8ac-4949-868f-2d70e282f25e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0984692eeaa447a6a9dd70435c72e55d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5bf297ecaf37442eb4b01f6a8ac2b69f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6445393b608c4822bd90bdf2f1692a0e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "vocab.txt: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "09f22b8c6976416d96c5af9bef5e25e4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer.json: 0.00B [00:00, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6049ddbbb2e94125b20c61d3d9ab5cb4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3a7f44f7809e41fd86309b76ee57f0dc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "ename": "OSError",
+     "evalue": "distilbert/distilbert-base-uncased does not appear to have a file named pytorch_model.bin or model.safetensors.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mOSError\u001b[39m                                   Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m      2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mtransformers\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer, AutoModelForMaskedLM\n\u001b[32m      4\u001b[39m tokenizer = AutoTokenizer.from_pretrained(\u001b[33m\"\u001b[39m\u001b[33mdistilbert/distilbert-base-uncased\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m model = \u001b[43mAutoModelForMaskedLM\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mdistilbert/distilbert-base-uncased\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\students\\course_feedback_nlp\\pytorch\\Lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:372\u001b[39m, in \u001b[36m_BaseAutoModelClass.from_pretrained\u001b[39m\u001b[34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[39m\n\u001b[32m    370\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m model_class.config_class == config.sub_configs.get(\u001b[33m\"\u001b[39m\u001b[33mtext_config\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m    371\u001b[39m         config = config.get_text_config()\n\u001b[32m--> \u001b[39m\u001b[32m372\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    373\u001b[39m \u001b[43m        \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[32m    374\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    375\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[32m    376\u001b[39m     \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig.\u001b[34m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m.\u001b[34m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m    377\u001b[39m     \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m, \u001b[39m\u001b[33m'\u001b[39m.join(c.\u001b[34m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m._model_mapping)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    378\u001b[39m )\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\students\\course_feedback_nlp\\pytorch\\Lib\\site-packages\\transformers\\modeling_utils.py:4038\u001b[39m, in \u001b[36mPreTrainedModel.from_pretrained\u001b[39m\u001b[34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)\u001b[39m\n\u001b[32m   4033\u001b[39m     logger.warning_once(\n\u001b[32m   4034\u001b[39m         \u001b[33m\"\u001b[39m\u001b[33mA kernel_config was provided but use_kernels is False; setting use_kernels=True automatically. To suppress this warning, explicitly set use_kernels to True.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m   4035\u001b[39m     )\n\u001b[32m   4036\u001b[39m     use_kernels = \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m4038\u001b[39m checkpoint_files, sharded_metadata = \u001b[43m_get_resolved_checkpoint_files\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   4039\u001b[39m \u001b[43m    \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m=\u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   4040\u001b[39m \u001b[43m    \u001b[49m\u001b[43mvariant\u001b[49m\u001b[43m=\u001b[49m\u001b[43mvariant\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   4041\u001b[39m \u001b[43m    \u001b[49m\u001b[43mgguf_file\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgguf_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   4042\u001b[39m \u001b[43m    \u001b[49m\u001b[43muse_safetensors\u001b[49m\u001b[43m=\u001b[49m\u001b[43muse_safetensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   4043\u001b[39m \u001b[43m    \u001b[49m\u001b[43mdownload_kwargs\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdownload_kwargs_with_commit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   4044\u001b[39m \u001b[43m    \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[43m=\u001b[49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   4045\u001b[39m \u001b[43m    \u001b[49m\u001b[43mis_remote_code\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_auto_class\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m   4046\u001b[39m \u001b[43m    \u001b[49m\u001b[43mtransformers_explicit_filename\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtransformers_weights\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   4047\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   4049\u001b[39m is_quantized = hf_quantizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m   4051\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m gguf_file:\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~\\Documents\\students\\course_feedback_nlp\\pytorch\\Lib\\site-packages\\transformers\\modeling_utils.py:710\u001b[39m, in \u001b[36m_get_resolved_checkpoint_files\u001b[39m\u001b[34m(pretrained_model_name_or_path, variant, gguf_file, use_safetensors, user_agent, is_remote_code, transformers_explicit_filename, download_kwargs)\u001b[39m\n\u001b[32m    704\u001b[39m             \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\n\u001b[32m    705\u001b[39m                 \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m does not appear to have a file named\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    706\u001b[39m                 \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_add_variant(WEIGHTS_NAME,\u001b[38;5;250m \u001b[39mvariant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m but there is a file without the variant\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    707\u001b[39m                 \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvariant\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m. Use `variant=None` to load this model from those weights.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    708\u001b[39m             )\n\u001b[32m    709\u001b[39m         \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m710\u001b[39m             \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\n\u001b[32m    711\u001b[39m                 \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m does not appear to have a file named\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    712\u001b[39m                 \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_add_variant(WEIGHTS_NAME,\u001b[38;5;250m \u001b[39mvariant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m or \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_add_variant(SAFE_WEIGHTS_NAME,\u001b[38;5;250m \u001b[39mvariant)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    713\u001b[39m             )\n\u001b[32m    715\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m:\n\u001b[32m    716\u001b[39m     \u001b[38;5;66;03m# Raise any environment error raise by `cached_file`. It will have a helpful error message adapted\u001b[39;00m\n\u001b[32m    717\u001b[39m     \u001b[38;5;66;03m# to the original exception.\u001b[39;00m\n\u001b[32m    718\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m\n",
+      "\u001b[31mOSError\u001b[39m: distilbert/distilbert-base-uncased does not appear to have a file named pytorch_model.bin or model.safetensors."
+     ]
+    }
+   ],
+   "source": [
+    "# Load model directly\n",
+    "from transformers import AutoTokenizer, AutoModelForMaskedLM\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"distilbert/distilbert-base-uncased\")\n",
+    "model = AutoModelForMaskedLM.from_pretrained(\"distilbert/distilbert-base-uncased\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "266c5278-0eb4-4daf-b18f-98d9d426ed70",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import transformers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "f687230b-a74c-4b63-92f9-f6bc378feecc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\u001b[31mType:\u001b[39m            _LazyModule\n",
+       "\u001b[31mString form:\u001b[39m     <module 'transformers' from 'C:\\\\Users\\\\PC\\\\Documents\\\\students\\\\course_feedback_nlp\\\\pytorch\\\\Lib\\\\site-packages\\\\transformers\\\\__init__.py'>\n",
+       "\u001b[31mFile:\u001b[39m            c:\\users\\pc\\documents\\students\\course_feedback_nlp\\pytorch\\lib\\site-packages\\transformers\\__init__.py\n",
+       "\u001b[31mDocstring:\u001b[39m       <no docstring>\n",
+       "\u001b[31mClass docstring:\u001b[39m Module class that surfaces all objects but only performs associated imports when the objects are requested."
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "transformers?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf2d9082-1d36-4fde-ad24-8e7599c65acc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pytorch",
+   "language": "python",
+   "name": "pytorch"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

course_feedback_nlp/evaluate.py ADDED Viewed

	@@ -0,0 +1,548 @@

+"""
+Student Feedback Sentiment Model - Evaluation Script
+====================================================
+Run this after training to complete:
+- Test evaluation
+- Generate plots
+- Save results
+"""
+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader, TensorDataset
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, confusion_matrix
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
+from tqdm.auto import tqdm
+import matplotlib.pyplot as plt
+import seaborn as sns
+import os
+import json
+import gc
+import warnings
+warnings.filterwarnings('ignore')
+# ============================================================
+# CONFIGURATION (must match training!)
+# ============================================================
+CONFIG = {
+    'data_path': 'Coursera_reviews.csv',
+    'base_model': './distilbert-base-uncased',
+    'output_dir': 'teacher_sentiment_model',
+    'num_classes': 3,
+    'class_names': ['Negative', 'Neutral', 'Positive'],
+    'class_mapping': {
+        0: 0,  # 1-star → Negative
+        1: 0,  # 2-star → Negative
+        2: 1,  # 3-star → Neutral
+        3: 2,  # 4-star → Positive
+        4: 2,  # 5-star → Positive
+    },
+    'max_length': 96,
+    'batch_size': 128,
+    'test_size': 0.1,
+    'seed': 42,
+    'num_workers': 4,
+    'use_amp': True,
+}
+def set_seed(seed):
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+def tokenize_batch(texts, tokenizer, max_length, desc="Tokenizing"):
+    all_input_ids = []
+    all_attention_masks = []
+    batch_size = 10000
+    for i in tqdm(range(0, len(texts), batch_size), desc=desc):
+        batch_texts = texts[i:i+batch_size].tolist()
+        encodings = tokenizer(
+            batch_texts,
+            truncation=True,
+            padding='max_length',
+            max_length=max_length,
+            return_tensors='pt'
+        )
+        all_input_ids.append(encodings['input_ids'])
+        all_attention_masks.append(encodings['attention_mask'])
+    return torch.cat(all_input_ids, dim=0), torch.cat(all_attention_masks, dim=0)
+def main():
+    set_seed(CONFIG['seed'])
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print("=" * 70)
+    print("STUDENT FEEDBACK SENTIMENT MODEL - EVALUATION")
+    print("=" * 70)
+    print(f"Device: {device}")
+    if torch.cuda.is_available():
+        print(f"GPU: {torch.cuda.get_device_name(0)}")
+    print()
+    os.makedirs('plots', exist_ok=True)
+    # ============================================================
+    # FIX CONFIG.JSON (the bug from training)
+    # ============================================================
+    print("FIXING MODEL CONFIG")
+    print("-" * 70)
+    # Load original config from base model
+    original_config = AutoConfig.from_pretrained(
+        CONFIG['base_model'],
+        local_files_only=True
+    )
+    # Update for our task
+    original_config.num_labels = CONFIG['num_classes']
+    original_config.id2label = {i: name for i, name in enumerate(CONFIG['class_names'])}
+    original_config.label2id = {name: i for i, name in enumerate(CONFIG['class_names'])}
+    # Save corrected config
+    original_config.save_pretrained(CONFIG['output_dir'])
+    print(f"  ✓ Fixed config.json in {CONFIG['output_dir']}/")
+    # Save our training config separately
+    training_config = {
+        'num_classes': CONFIG['num_classes'],
+        'class_names': CONFIG['class_names'],
+        'class_mapping': CONFIG['class_mapping'],
+        'max_length': CONFIG['max_length'],
+    }
+    with open(os.path.join(CONFIG['output_dir'], 'training_config.json'), 'w') as f:
+        json.dump(training_config, f, indent=2)
+    print(f"  ✓ Saved training_config.json")
+    print()
+    # ============================================================
+    # LOAD DATA (only need test set)
+    # ============================================================
+    print("LOADING DATA")
+    print("-" * 70)
+    df = pd.read_csv(CONFIG['data_path'])
+    print(f"Raw data: {len(df):,} samples")
+    # Clean
+    df = df.dropna(subset=['reviews', 'rating'])
+    df = df[df['reviews'].str.strip() != '']
+    df['rating'] = df['rating'].astype(int)
+    df = df[df['rating'].between(1, 5)]
+    # Map to 3 classes
+    df['label_5class'] = df['rating'] - 1
+    df['label'] = df['label_5class'].map(CONFIG['class_mapping'])
+    print(f"Cleaned data: {len(df):,} samples")
+    # Get test split (same as training!)
+    _, X_test, _, y_test = train_test_split(
+        df['reviews'].values, df['label'].values,
+        test_size=CONFIG['test_size'],
+        random_state=CONFIG['seed'],
+        stratify=df['label'].values
+    )
+    print(f"Test samples: {len(X_test):,}")
+    print()
+    del df
+    gc.collect()
+    # ============================================================
+    # TOKENIZE TEST DATA
+    # ============================================================
+    print("TOKENIZATION")
+    print("-" * 70)
+    tokenizer = AutoTokenizer.from_pretrained(CONFIG['output_dir'], local_files_only=True)
+    test_ids, test_masks = tokenize_batch(X_test, tokenizer, CONFIG['max_length'], "Test")
+    test_labels = torch.tensor(y_test, dtype=torch.long)
+    test_dataset = TensorDataset(test_ids, test_masks, test_labels)
+    test_loader = DataLoader(
+        test_dataset,
+        batch_size=CONFIG['batch_size'],
+        shuffle=False,
+        num_workers=CONFIG['num_workers'],
+        pin_memory=True
+    )
+    print(f"Test batches: {len(test_loader):,}")
+    print()
+    del X_test, y_test
+    gc.collect()
+    # ============================================================
+    # LOAD MODEL
+    # ============================================================
+    print("LOADING MODEL")
+    print("-" * 70)
+    model = AutoModelForSequenceClassification.from_pretrained(
+        CONFIG['output_dir'],
+        local_files_only=True
+    )
+    model = model.to(device)
+    model.eval()
+    print(f"  ✓ Model loaded from {CONFIG['output_dir']}/")
+    print(f"  ✓ Num labels: {model.config.num_labels}")
+    print()
+    # ============================================================
+    # RUN TEST EVALUATION
+    # ============================================================
+    print("=" * 70)
+    print("FINAL TEST EVALUATION")
+    print("=" * 70)
+    all_preds = []
+    all_labels = []
+    all_probs = []
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in tqdm(test_loader, desc="Testing", ncols=100):
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            if CONFIG['use_amp']:
+                with torch.amp.autocast('cuda'):
+                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+            else:
+                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+            probs = F.softmax(outputs.logits, dim=-1)
+            _, preds = outputs.logits.max(1)
+            all_preds.extend(preds.cpu().numpy())
+            all_labels.extend(labels.numpy())
+            all_probs.extend(probs.cpu().numpy())
+    all_preds = np.array(all_preds)
+    all_labels = np.array(all_labels)
+    all_probs = np.array(all_probs)
+    test_acc = 100 * (all_preds == all_labels).mean()
+    print()
+    print(f"Test Accuracy: {test_acc:.2f}%")
+    print()
+    # ============================================================
+    # CLASSIFICATION REPORT
+    # ============================================================
+    print("CLASSIFICATION REPORT")
+    print("-" * 70)
+    report = classification_report(
+        all_labels, all_preds,
+        target_names=CONFIG['class_names'],
+        digits=3,
+        output_dict=True
+    )
+    print(classification_report(
+        all_labels, all_preds,
+        target_names=CONFIG['class_names'],
+        digits=3
+    ))
+    # ============================================================
+    # TEACHER-FOCUSED METRICS
+    # ============================================================
+    print()
+    print("=" * 70)
+    print("📊 TEACHER-FOCUSED METRICS")
+    print("=" * 70)
+    print()
+    # Negative class recall
+    negative_recall = report['Negative']['recall'] * 100
+    negative_precision = report['Negative']['precision'] * 100
+    negative_f1 = report['Negative']['f1-score'] * 100
+    print(f"  🔴 NEGATIVE FEEDBACK DETECTION (Struggling Students):")
+    print(f"     Recall:    {negative_recall:.1f}% ← Catches {negative_recall:.0f}% of struggling students")
+    print(f"     Precision: {negative_precision:.1f}% ← {negative_precision:.0f}% of flags are real issues")
+    print(f"     F1-Score:  {negative_f1:.1f}%")
+    print()
+    # False negative analysis
+    false_negatives = ((all_labels == 0) & (all_preds != 0)).sum()
+    total_negatives = (all_labels == 0).sum()
+    missed_pct = 100 * false_negatives / total_negatives if total_negatives > 0 else 0
+    print(f"  ⚠️  MISSED STRUGGLING STUDENTS:")
+    print(f"     {false_negatives:,} of {total_negatives:,} negative cases missed ({missed_pct:.1f}%)")
+    print()
+    # Where did false negatives go?
+    fn_mask = (all_labels == 0) & (all_preds != 0)
+    if fn_mask.sum() > 0:
+        fn_preds = all_preds[fn_mask]
+        fn_to_neutral = (fn_preds == 1).sum()
+        fn_to_positive = (fn_preds == 2).sum()
+        print(f"     Misclassified as Neutral:  {fn_to_neutral:,}")
+        print(f"     Misclassified as Positive: {fn_to_positive:,}")
+        print()
+    # Confidence analysis
+    pred_confidence = all_probs.max(axis=1)
+    low_confidence = (pred_confidence < 0.7).sum()
+    low_conf_pct = 100 * low_confidence / len(pred_confidence)
+    print(f"  🤔 UNCERTAIN PREDICTIONS (confidence < 70%):")
+    print(f"     {low_confidence:,} of {len(pred_confidence):,} predictions ({low_conf_pct:.1f}%)")
+    print(f"     → These should be flagged for manual review")
+    print()
+    # Confidence by class
+    print(f"  📈 AVERAGE CONFIDENCE BY PREDICTION:")
+    for i, name in enumerate(CONFIG['class_names']):
+        mask = all_preds == i
+        if mask.sum() > 0:
+            avg_conf = pred_confidence[mask].mean() * 100
+            emoji = ['🔴', '🟡', '🟢'][i]
+            print(f"     {emoji} {name}: {avg_conf:.1f}%")
+    print()
+    # ============================================================
+    # CONFUSION MATRIX PLOT
+    # ============================================================
+    print("GENERATING PLOTS")
+    print("-" * 70)
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+    cm = confusion_matrix(all_labels, all_preds)
+    # Counts
+    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
+                xticklabels=CONFIG['class_names'],
+                yticklabels=CONFIG['class_names'], ax=axes[0],
+                annot_kws={'size': 14})
+    axes[0].set_xlabel('Predicted', fontsize=12)
+    axes[0].set_ylabel('Actual', fontsize=12)
+    axes[0].set_title('Confusion Matrix (Counts)', fontsize=14)
+    # Normalized (Recall)
+    cm_norm = cm.astype(float) / cm.sum(axis=1, keepdims=True)
+    sns.heatmap(cm_norm, annot=True, fmt='.1%', cmap='Blues',
+                xticklabels=CONFIG['class_names'],
+                yticklabels=CONFIG['class_names'], ax=axes[1],
+                annot_kws={'size': 14})
+    axes[1].set_xlabel('Predicted', fontsize=12)
+    axes[1].set_ylabel('Actual', fontsize=12)
+    axes[1].set_title('Confusion Matrix (Recall per Class)', fontsize=14)
+    plt.tight_layout()
+    plt.savefig('plots/confusion_matrix_3class.png', dpi=150, bbox_inches='tight')
+    print("  ✓ Saved: plots/confusion_matrix_3class.png")
+    # ============================================================
+    # PER-CLASS METRICS PLOT
+    # ============================================================
+    fig, ax = plt.subplots(figsize=(12, 6))
+    x = np.arange(3)
+    width = 0.25
+    recalls = [report[c]['recall'] * 100 for c in CONFIG['class_names']]
+    precisions = [report[c]['precision'] * 100 for c in CONFIG['class_names']]
+    f1s = [report[c]['f1-score'] * 100 for c in CONFIG['class_names']]
+    bars1 = ax.bar(x - width, recalls, width, label='Recall', color='#e74c3c', edgecolor='black')
+    bars2 = ax.bar(x, precisions, width, label='Precision', color='#3498db', edgecolor='black')
+    bars3 = ax.bar(x + width, f1s, width, label='F1-Score', color='#2ecc71', edgecolor='black')
+    ax.set_ylabel('Score (%)', fontsize=12)
+    ax.set_title('Per-Class Metrics (Teacher Sentiment Model)', fontsize=14)
+    ax.set_xticks(x)
+    ax.set_xticklabels([
+        '🔴 Negative\n(Needs Attention)',
+        '🟡 Neutral\n(Mixed/Unclear)',
+        '🟢 Positive\n(Satisfied)'
+    ], fontsize=11)
+    ax.legend(fontsize=11)
+    ax.set_ylim(0, 105)
+    ax.axhline(y=90, color='green', linestyle='--', alpha=0.5, label='90% target')
+    ax.grid(True, alpha=0.3, axis='y')
+    # Add value labels
+    for bars in [bars1, bars2, bars3]:
+        for bar in bars:
+            height = bar.get_height()
+            ax.annotate(f'{height:.1f}%',
+                        xy=(bar.get_x() + bar.get_width() / 2, height),
+                        xytext=(0, 3),
+                        textcoords="offset points",
+                        ha='center', va='bottom', fontsize=10, fontweight='bold')
+    plt.tight_layout()
+    plt.savefig('plots/per_class_metrics_3class.png', dpi=150, bbox_inches='tight')
+    print("  ✓ Saved: plots/per_class_metrics_3class.png")
+    # ============================================================
+    # CONFIDENCE DISTRIBUTION PLOT
+    # ============================================================
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+    # Overall confidence distribution
+    axes[0].hist(pred_confidence, bins=50, color='steelblue', edgecolor='black', alpha=0.7)
+    axes[0].axvline(x=0.7, color='red', linestyle='--', linewidth=2, label='70% threshold')
+    axes[0].set_xlabel('Confidence', fontsize=12)
+    axes[0].set_ylabel('Count', fontsize=12)
+    axes[0].set_title('Prediction Confidence Distribution', fontsize=14)
+    axes[0].legend()
+    axes[0].grid(True, alpha=0.3)
+    # Confidence by class
+    colors = ['#e74c3c', '#f39c12', '#27ae60']
+    for i, (name, color) in enumerate(zip(CONFIG['class_names'], colors)):
+        mask = all_preds == i
+        if mask.sum() > 0:
+            axes[1].hist(pred_confidence[mask], bins=30, alpha=0.5, label=name, color=color)
+    axes[1].axvline(x=0.7, color='red', linestyle='--', linewidth=2, label='70% threshold')
+    axes[1].set_xlabel('Confidence', fontsize=12)
+    axes[1].set_ylabel('Count', fontsize=12)
+    axes[1].set_title('Confidence by Predicted Class', fontsize=14)
+    axes[1].legend()
+    axes[1].grid(True, alpha=0.3)
+    plt.tight_layout()
+    plt.savefig('plots/confidence_distribution.png', dpi=150, bbox_inches='tight')
+    print("  ✓ Saved: plots/confidence_distribution.png")
+    # ============================================================
+    # ERROR ANALYSIS PLOT
+    # ============================================================
+    fig, ax = plt.subplots(figsize=(10, 6))
+    # Calculate error rates
+    error_rates = []
+    for i, name in enumerate(CONFIG['class_names']):
+        mask = all_labels == i
+        errors = (all_preds[mask] != all_labels[mask]).sum()
+        total = mask.sum()
+        error_rate = 100 * errors / total if total > 0 else 0
+        error_rates.append(error_rate)
+    colors = ['#e74c3c', '#f39c12', '#27ae60']
+    bars = ax.bar(CONFIG['class_names'], error_rates, color=colors, edgecolor='black', linewidth=1.5)
+    ax.set_ylabel('Error Rate (%)', fontsize=12)
+    ax.set_title('Error Rate by True Class', fontsize=14)
+    ax.set_ylim(0, max(error_rates) * 1.2 if max(error_rates) > 0 else 10)
+    ax.grid(True, alpha=0.3, axis='y')
+    for bar, rate in zip(bars, error_rates):
+        ax.annotate(f'{rate:.1f}%',
+                    xy=(bar.get_x() + bar.get_width() / 2, bar.get_height()),
+                    xytext=(0, 3),
+                    textcoords="offset points",
+                    ha='center', va='bottom', fontsize=12, fontweight='bold')
+    plt.tight_layout()
+    plt.savefig('plots/error_analysis.png', dpi=150, bbox_inches='tight')
+    print("  ✓ Saved: plots/error_analysis.png")
+    # ============================================================
+    # SAVE RESULTS
+    # ============================================================
+    print()
+    print("SAVING RESULTS")
+    print("-" * 70)
+    results = {
+        'test_accuracy': float(test_acc),
+        'negative_recall': float(negative_recall),
+        'negative_precision': float(negative_precision),
+        'negative_f1': float(negative_f1),
+        'neutral_recall': float(report['Neutral']['recall'] * 100),
+        'positive_recall': float(report['Positive']['recall'] * 100),
+        'missed_struggling_students': int(false_negatives),
+        'total_negative_cases': int(total_negatives),
+        'missed_percentage': float(missed_pct),
+        'low_confidence_predictions': int(low_confidence),
+        'low_confidence_percentage': float(low_conf_pct),
+        'macro_f1': float(report['macro avg']['f1-score'] * 100),
+        'weighted_f1': float(report['weighted avg']['f1-score'] * 100),
+    }
+    # Save as JSON
+    with open(os.path.join(CONFIG['output_dir'], 'results.json'), 'w') as f:
+        json.dump(results, f, indent=2)
+    print(f"  ✓ Saved: {CONFIG['output_dir']}/results.json")
+    # Save full results as PyTorch
+    full_results = {
+        **results,
+        'config': CONFIG,
+        'classification_report': report,
+        'confusion_matrix': cm.tolist(),
+        'all_predictions': all_preds.tolist(),
+        'all_labels': all_labels.tolist(),
+    }
+    torch.save(full_results, os.path.join(CONFIG['output_dir'], 'results.pt'))
+    print(f"  ✓ Saved: {CONFIG['output_dir']}/results.pt")
+    # ============================================================
+    # FINAL SUMMARY
+    # ============================================================
+    print()
+    print("=" * 70)
+    print("🎉 EVALUATION COMPLETE!")
+    print("=" * 70)
+    print()
+    print("  RESULTS SUMMARY:")
+    print(f"    Test Accuracy:      {test_acc:.2f}%")
+    print(f"    Macro F1-Score:     {report['macro avg']['f1-score']*100:.2f}%")
+    print(f"    Weighted F1-Score:  {report['weighted avg']['f1-score']*100:.2f}%")
+    print()
+    print("  PER-CLASS RECALL (most important for teachers):")
+    print(f"    🔴 Negative:  {negative_recall:.1f}% ← Catches {100-missed_pct:.0f}% of struggling students")
+    print(f"    🟡 Neutral:   {report['Neutral']['recall']*100:.1f}%")
+    print(f"    🟢 Positive:  {report['Positive']['recall']*100:.1f}%")
+    print()
+    print("  KEY INSIGHTS:")
+    print(f"    • {false_negatives:,} struggling students would be missed ({missed_pct:.1f}%)")
+    print(f"    • {low_confidence:,} predictions need manual review ({low_conf_pct:.1f}%)")
+    print()
+    print("  FILES SAVED:")
+    print(f"    • {CONFIG['output_dir']}/results.json")
+    print(f"    • {CONFIG['output_dir']}/results.pt")
+    print(f"    • plots/confusion_matrix_3class.png")
+    print(f"    • plots/per_class_metrics_3class.png")
+    print(f"    • plots/confidence_distribution.png")
+    print(f"    • plots/error_analysis.png")
+    print()
+    print("=" * 70)
+if __name__ == '__main__':
+    main()

course_feedback_nlp/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+tqdm
+pandas
+numpy
+scikit-learn
+seaborn
+matplotlib
+transformers

course_feedback_nlp/test.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# save as predict.py
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+def predict(text):
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = AutoModelForSequenceClassification.from_pretrained(
+        'sentiment_model', local_files_only=True
+    ).to(device)
+    model.eval()
+    tokenizer = AutoTokenizer.from_pretrained(
+        'sentiment_model', local_files_only=True
+    )
+    inputs = tokenizer(
+        text,
+        return_tensors='pt',
+        truncation=True,
+        max_length=96,
+        padding='max_length'
+    ).to(device)
+    with torch.no_grad():
+        with torch.amp.autocast('cuda'):
+            outputs = model(**inputs)
+    probs = torch.softmax(outputs.logits, dim=1)
+    pred_class = outputs.logits.argmax(dim=1).item() + 1  # 1-5
+    confidence = probs[0][pred_class - 1].item()
+    return {
+        'rating': pred_class,
+        'confidence': f'{confidence:.1%}',
+        'all_probs': {i+1: f'{p:.1%}' for i, p in enumerate(probs[0])}
+    }
+if __name__ == '__main__':
+    tests = [
+        "This course was amazing! Best I've ever taken!",
+        "Terrible waste of time. Very boring.",
+        "It was okay, nothing special.",
+        "Good course but could be better organized.",
+        "Absolutely fantastic! Highly recommend!"
+    ]
+    for text in tests:
+        result = predict(text)
+        print(f"\n'{text[:50]}...'")
+        print(f"  → Predicted: {result['rating']} stars ({result['confidence']})")

course_feedback_nlp/train.py ADDED Viewed

	@@ -0,0 +1,862 @@

+"""
+Course Review Sentiment Model - Training Script
+VRAM Optimized for AMD 7900 XTX (24GB)
+PATCHES APPLIED:
+- Class weights to handle imbalanced data (78.8% are 5-star reviews)
+- Optimized batch_size=128 for better accuracy
+- max_length=96 for faster training
+- AMD crash protection and emergency checkpointing
+- Periodic checkpoint saving (every epoch)
+"""
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset, DataLoader, TensorDataset
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, confusion_matrix
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from transformers import get_linear_schedule_with_warmup
+from tqdm.auto import tqdm
+import matplotlib.pyplot as plt
+import seaborn as sns
+import os
+import time
+import gc
+import warnings
+warnings.filterwarnings('ignore')
+# ============================================================
+# AMD CRASH PROTECTION - Suppress problematic logging
+# ============================================================
+os.environ['AMD_LOG_LEVEL'] = '0'
+os.environ['ROCM_LOG_LEVEL'] = '0'
+os.environ['HIP_VISIBLE_DEVICES'] = '0'
+# ============================================================
+# CONFIGURATION
+# ============================================================
+CONFIG = {
+    'data_path': 'Coursera_reviews.csv',
+    'model_name': './distilbert-base-uncased',
+    'output_dir': 'sentiment_model',
+    'checkpoint_dir': 'checkpoints',      # NEW: For periodic saves
+    'max_length': 96,                     # CHANGED: 128 → 96 (faster, minimal accuracy loss)
+    'batch_size': 128,                    # CHANGED: 512 → 128 (better accuracy per Run 3)
+    'epochs': 5,
+    'learning_rate': 2e-5,
+    'weight_decay': 0.01,
+    'warmup_ratio': 0.1,
+    'train_size': 0.8,
+    'val_size': 0.1,
+    'test_size': 0.1,
+    'seed': 42,
+    'num_workers': 4,
+    'pin_memory': True,
+    'use_amp': True,                      # Mixed precision for speed
+    'use_class_weights': True,            # NEW: Address class imbalance
+    'checkpoint_every_epoch': True,       # NEW: Save checkpoint every epoch
+}
+# ============================================================
+# MAIN FUNCTION
+# ============================================================
+def main():
+    # ============================================================
+    # SETUP
+    # ============================================================
+    def set_seed(seed):
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+        np.random.seed(seed)
+    set_seed(CONFIG['seed'])
+    # Device
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print("=" * 70)
+    print("DEVICE INFORMATION")
+    print("=" * 70)
+    print(f"  Device: {device}")
+    if torch.cuda.is_available():
+        print(f"  GPU:    {torch.cuda.get_device_name(0)}")
+        total_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
+        print(f"  Memory: {total_mem:.2f} GB")
+    print("=" * 70)
+    print()
+    # Create directories early
+    os.makedirs(CONFIG['output_dir'], exist_ok=True)
+    os.makedirs(CONFIG['checkpoint_dir'], exist_ok=True)
+    os.makedirs('plots', exist_ok=True)
+    # ============================================================
+    # VERIFY LOCAL MODEL EXISTS
+    # ============================================================
+    print("=" * 70)
+    print("VERIFYING LOCAL MODEL")
+    print("=" * 70)
+    model_path = CONFIG['model_name']
+    if os.path.exists(model_path):
+        print(f"  ✓ Model directory found: {model_path}")
+    else:
+        print(f"  ✗ Model directory NOT found: {model_path}")
+        return
+    print("=" * 70)
+    print()
+    # ============================================================
+    # DATA LOADING
+    # ============================================================
+    print("=" * 70)
+    print("DATA LOADING")
+    print("=" * 70)
+    print("Loading data...")
+    df = pd.read_csv(CONFIG['data_path'])
+    print(f"  Raw data shape: {df.shape}")
+    # Clean data
+    df = df.dropna(subset=['reviews', 'rating'])
+    df = df[df['reviews'].str.strip() != '']
+    df['rating'] = df['rating'].astype(int)
+    df = df[df['rating'].between(1, 5)]
+    df['label'] = df['rating'] - 1
+    print(f"  Cleaned data shape: {df.shape}")
+    print(f"\n  Rating distribution:")
+    for rating, count in df['rating'].value_counts().sort_index().items():
+        pct = 100 * count / len(df)
+        bar = "█" * int(pct / 2)
+        print(f"    {rating} Star: {count:>8,} ({pct:>5.1f}%) {bar}")
+    # ============================================================
+    # CALCULATE CLASS WEIGHTS (Before deleting df!)
+    # ============================================================
+    if CONFIG['use_class_weights']:
+        print(f"\n  Calculating class weights...")
+        class_counts = df['label'].value_counts().sort_index().values
+        # Inverse frequency weighting
+        class_weights = 1.0 / class_counts
+        # Normalize so weights sum to num_classes
+        class_weights = class_weights / class_weights.sum() * len(class_counts)
+        class_weights = torch.tensor(class_weights, dtype=torch.float32)
+        print(f"  Class weights (to balance {class_counts[-1]/class_counts[0]:.1f}x imbalance):")
+        for i, (w, c) in enumerate(zip(class_weights, class_counts)):
+            print(f"    {i+1} Star: weight={w:.4f} (count={c:,})")
+    else:
+        class_weights = None
+    print("=" * 70)
+    print()
+    # ============================================================
+    # TRAIN / VALIDATION / TEST SPLIT
+    # ============================================================
+    print("=" * 70)
+    print("DATA SPLITTING")
+    print("=" * 70)
+    X_temp, X_test, y_temp, y_test = train_test_split(
+        df['reviews'].values,
+        df['label'].values,
+        test_size=CONFIG['test_size'],
+        random_state=CONFIG['seed'],
+        stratify=df['label'].values
+    )
+    relative_val_size = CONFIG['val_size'] / (CONFIG['train_size'] + CONFIG['val_size'])
+    X_train, X_val, y_train, y_val = train_test_split(
+        X_temp,
+        y_temp,
+        test_size=relative_val_size,
+        random_state=CONFIG['seed'],
+        stratify=y_temp
+    )
+    print(f"  Training samples:   {len(X_train):>10,} ({100*len(X_train)/len(df):.1f}%)")
+    print(f"  Validation samples: {len(X_val):>10,} ({100*len(X_val)/len(df):.1f}%)")
+    print(f"  Test samples:       {len(X_test):>10,} ({100*len(X_test)/len(df):.1f}%)")
+    print("=" * 70)
+    print()
+    # Now we can delete df
+    del df
+    gc.collect()
+    # ============================================================
+    # TOKENIZER
+    # ============================================================
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(
+        CONFIG['model_name'],
+        local_files_only=True
+    )
+    print(f"  ✓ Tokenizer loaded")
+    print()
+    # ============================================================
+    # PRE-TOKENIZE ALL DATA (Key optimization!)
+    # ============================================================
+    print("=" * 70)
+    print("PRE-TOKENIZING ALL DATA")
+    print("=" * 70)
+    print("  This runs once and stores tensors for fast loading...")
+    print()
+    def tokenize_batch(texts, desc="Tokenizing"):
+        """Tokenize all texts at once using batch processing"""
+        all_input_ids = []
+        all_attention_masks = []
+        batch_size = 10000  # Process 10k at a time to avoid memory issues
+        for i in tqdm(range(0, len(texts), batch_size), desc=desc):
+            batch_texts = texts[i:i+batch_size].tolist()
+            encodings = tokenizer(
+                batch_texts,
+                truncation=True,
+                padding='max_length',
+                max_length=CONFIG['max_length'],
+                return_tensors='pt'
+            )
+            all_input_ids.append(encodings['input_ids'])
+            all_attention_masks.append(encodings['attention_mask'])
+        return (
+            torch.cat(all_input_ids, dim=0),
+            torch.cat(all_attention_masks, dim=0)
+        )
+    # Tokenize train
+    print("  Tokenizing training data...")
+    train_input_ids, train_attention_masks = tokenize_batch(X_train, "  Train")
+    train_labels = torch.tensor(y_train, dtype=torch.long)
+    # Tokenize validation
+    print("  Tokenizing validation data...")
+    val_input_ids, val_attention_masks = tokenize_batch(X_val, "  Val")
+    val_labels = torch.tensor(y_val, dtype=torch.long)
+    # Tokenize test
+    print("  Tokenizing test data...")
+    test_input_ids, test_attention_masks = tokenize_batch(X_test, "  Test")
+    test_labels = torch.tensor(y_test, dtype=torch.long)
+    # Free memory
+    del X_train, X_val, X_test, y_train, y_val, y_test, X_temp, y_temp
+    gc.collect()
+    print()
+    print(f"  ✓ Train tensors: {train_input_ids.shape}")
+    print(f"  ✓ Val tensors:   {val_input_ids.shape}")
+    print(f"  ✓ Test tensors:  {test_input_ids.shape}")
+    print("=" * 70)
+    print()
+    # ============================================================
+    # CREATE TENSOR DATASETS (Fast!)
+    # ============================================================
+    train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels)
+    val_dataset = TensorDataset(val_input_ids, val_attention_masks, val_labels)
+    test_dataset = TensorDataset(test_input_ids, test_attention_masks, test_labels)
+    # ============================================================
+    # DATALOADERS
+    # ============================================================
+    print("Creating dataloaders...")
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=CONFIG['batch_size'],
+        shuffle=True,
+        num_workers=CONFIG['num_workers'],
+        pin_memory=CONFIG['pin_memory'],
+        persistent_workers=True  # NEW: Keep workers alive between epochs
+    )
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=CONFIG['batch_size'],
+        shuffle=False,
+        num_workers=CONFIG['num_workers'],
+        pin_memory=CONFIG['pin_memory'],
+        persistent_workers=True
+    )
+    test_loader = DataLoader(
+        test_dataset,
+        batch_size=CONFIG['batch_size'],
+        shuffle=False,
+        num_workers=CONFIG['num_workers'],
+        pin_memory=CONFIG['pin_memory'],
+        persistent_workers=True
+    )
+    print(f"  ✓ Train batches:      {len(train_loader):,}")
+    print(f"  ✓ Validation batches: {len(val_loader):,}")
+    print(f"  ✓ Test batches:       {len(test_loader):,}")
+    print()
+    # ============================================================
+    # MODEL
+    # ============================================================
+    print("Loading model...")
+    model = AutoModelForSequenceClassification.from_pretrained(
+        CONFIG['model_name'],
+        num_labels=5,
+        local_files_only=True
+    )
+    model = model.to(device)
+    total_params = sum(p.numel() for p in model.parameters())
+    print(f"  ✓ Model loaded")
+    print(f"  ✓ Total parameters: {total_params:,}")
+    print()
+    # ============================================================
+    # LOSS FUNCTION WITH CLASS WEIGHTS
+    # ============================================================
+    if CONFIG['use_class_weights'] and class_weights is not None:
+        class_weights = class_weights.to(device)
+        criterion = nn.CrossEntropyLoss(weight=class_weights)
+        print(f"  ✓ Using weighted CrossEntropyLoss")
+    else:
+        criterion = nn.CrossEntropyLoss()
+        print(f"  ✓ Using standard CrossEntropyLoss")
+    print()
+    # ============================================================
+    # OPTIMIZER & SCHEDULER
+    # ============================================================
+    optimizer = torch.optim.AdamW(
+        model.parameters(),
+        lr=CONFIG['learning_rate'],
+        weight_decay=CONFIG['weight_decay']
+    )
+    total_steps = len(train_loader) * CONFIG['epochs']
+    warmup_steps = int(total_steps * CONFIG['warmup_ratio'])
+    scheduler = get_linear_schedule_with_warmup(
+        optimizer,
+        num_warmup_steps=warmup_steps,
+        num_training_steps=total_steps
+    )
+    # Mixed Precision Scaler (for speed)
+    scaler = torch.amp.GradScaler('cuda') if CONFIG['use_amp'] else None
+    print("Optimizer & Scheduler configured:")
+    print(f"  ✓ Optimizer:        AdamW (lr={CONFIG['learning_rate']})")
+    print(f"  ✓ Total steps:      {total_steps:,}")
+    print(f"  ✓ Warmup steps:     {warmup_steps:,}")
+    print(f"  ✓ Mixed Precision:  {CONFIG['use_amp']}")
+    print()
+    # ============================================================
+    # HELPER FUNCTION: Save checkpoint
+    # ============================================================
+    def save_checkpoint(model, tokenizer, optimizer, scheduler, scaler, epoch,
+                        val_acc, history, path, is_best=False):
+        """Save a training checkpoint"""
+        checkpoint = {
+            'epoch': epoch,
+            'model_state_dict': model.state_dict(),
+            'optimizer_state_dict': optimizer.state_dict(),
+            'scheduler_state_dict': scheduler.state_dict(),
+            'scaler_state_dict': scaler.state_dict() if scaler else None,
+            'val_accuracy': val_acc,
+            'history': history,
+            'config': CONFIG,
+        }
+        torch.save(checkpoint, path)
+        if is_best:
+            model.save_pretrained(CONFIG['output_dir'])
+            tokenizer.save_pretrained(CONFIG['output_dir'])
+    # ============================================================
+    # TRAINING LOOP (with crash protection)
+    # ============================================================
+    print("=" * 70)
+    print("TRAINING STARTED")
+    print("=" * 70)
+    print(f"  Epochs:        {CONFIG['epochs']}")
+    print(f"  Batch size:    {CONFIG['batch_size']}")
+    print(f"  Max length:    {CONFIG['max_length']}")
+    print(f"  Device:        {device}")
+    print(f"  AMP:           {CONFIG['use_amp']}")
+    print(f"  Class weights: {CONFIG['use_class_weights']}")
+    print("=" * 70)
+    print()
+    best_val_acc = 0
+    history = {
+        'train_loss': [],
+        'train_acc': [],
+        'val_loss': [],
+        'val_acc': []
+    }
+    total_train_time = 0
+    # ============================================================
+    # WRAP IN TRY/EXCEPT FOR CRASH PROTECTION
+    # ============================================================
+    try:
+        for epoch in range(CONFIG['epochs']):
+            epoch_start_time = time.time()
+            # ==================== TRAINING ====================
+            model.train()
+            train_loss = 0
+            train_correct = 0
+            train_total = 0
+            train_pbar = tqdm(
+                train_loader,
+                desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [TRAIN]",
+                unit="batch",
+                ncols=120
+            )
+            for batch_idx, (input_ids, attention_mask, labels) in enumerate(train_pbar):
+                # Move to GPU with non_blocking for speed
+                input_ids = input_ids.to(device, non_blocking=True)
+                attention_mask = attention_mask.to(device, non_blocking=True)
+                labels = labels.to(device, non_blocking=True)
+                optimizer.zero_grad()
+                # Mixed precision forward pass
+                if CONFIG['use_amp']:
+                    with torch.amp.autocast('cuda'):
+                        outputs = model(
+                            input_ids=input_ids,
+                            attention_mask=attention_mask
+                        )
+                        # USE CUSTOM LOSS WITH CLASS WEIGHTS
+                        logits = outputs.logits
+                        loss = criterion(logits, labels)
+                    scaler.scale(loss).backward()
+                    scaler.unscale_(optimizer)
+                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+                    scaler.step(optimizer)
+                    scaler.update()
+                else:
+                    outputs = model(
+                        input_ids=input_ids,
+                        attention_mask=attention_mask
+                    )
+                    logits = outputs.logits
+                    loss = criterion(logits, labels)
+                    loss.backward()
+                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+                    optimizer.step()
+                scheduler.step()
+                train_loss += loss.item()
+                _, predicted = logits.max(1)
+                train_total += labels.size(0)
+                train_correct += predicted.eq(labels).sum().item()
+                running_loss = train_loss / (batch_idx + 1)
+                running_acc = 100 * train_correct / train_total
+                current_lr = scheduler.get_last_lr()[0]
+                # Show GPU memory usage
+                if torch.cuda.is_available():
+                    mem_used = torch.cuda.memory_allocated() / 1e9
+                    mem_total = torch.cuda.get_device_properties(0).total_memory / 1e9
+                train_pbar.set_postfix({
+                    'loss': f'{running_loss:.4f}',
+                    'acc': f'{running_acc:.2f}%',
+                    'lr': f'{current_lr:.2e}',
+                    'VRAM': f'{mem_used:.1f}/{mem_total:.1f}GB'
+                })
+            train_loss = train_loss / len(train_loader)
+            train_acc = 100 * train_correct / train_total
+            # ==================== VALIDATION ====================
+            model.eval()
+            val_loss = 0
+            val_correct = 0
+            val_total = 0
+            val_pbar = tqdm(
+                val_loader,
+                desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [VAL]  ",
+                unit="batch",
+                ncols=120
+            )
+            with torch.no_grad():
+                for batch_idx, (input_ids, attention_mask, labels) in enumerate(val_pbar):
+                    input_ids = input_ids.to(device, non_blocking=True)
+                    attention_mask = attention_mask.to(device, non_blocking=True)
+                    labels = labels.to(device, non_blocking=True)
+                    if CONFIG['use_amp']:
+                        with torch.amp.autocast('cuda'):
+                            outputs = model(
+                                input_ids=input_ids,
+                                attention_mask=attention_mask
+                            )
+                            logits = outputs.logits
+                            loss = criterion(logits, labels)
+                    else:
+                        outputs = model(
+                            input_ids=input_ids,
+                            attention_mask=attention_mask
+                        )
+                        logits = outputs.logits
+                        loss = criterion(logits, labels)
+                    val_loss += loss.item()
+                    _, predicted = logits.max(1)
+                    val_total += labels.size(0)
+                    val_correct += predicted.eq(labels).sum().item()
+                    running_loss = val_loss / (batch_idx + 1)
+                    running_acc = 100 * val_correct / val_total
+                    val_pbar.set_postfix({
+                        'loss': f'{running_loss:.4f}',
+                        'acc': f'{running_acc:.2f}%'
+                    })
+            val_loss = val_loss / len(val_loader)
+            val_acc = 100 * val_correct / val_total
+            history['train_loss'].append(train_loss)
+            history['train_acc'].append(train_acc)
+            history['val_loss'].append(val_loss)
+            history['val_acc'].append(val_acc)
+            epoch_time = time.time() - epoch_start_time
+            total_train_time += epoch_time
+            # ==================== EPOCH SUMMARY ====================
+            print()
+            print("─" * 70)
+            print(f"EPOCH {epoch+1}/{CONFIG['epochs']} SUMMARY")
+            print("─" * 70)
+            print(f"  {'Metric':<20} {'Train':>15} {'Validation':>15}")
+            print(f"  {'-'*20} {'-'*15} {'-'*15}")
+            print(f"  {'Loss':<20} {train_loss:>15.4f} {val_loss:>15.4f}")
+            print(f"  {'Accuracy':<20} {train_acc:>14.2f}% {val_acc:>14.2f}%")
+            print(f"  {'-'*20} {'-'*15} {'-'*15}")
+            print(f"  {'Time':<20} {epoch_time:>14.1f}s")
+            print(f"  {'Samples/sec':<20} {len(train_dataset)/epoch_time:>14.1f}")
+            # ==================== SAVE CHECKPOINT ====================
+            is_best = val_acc > best_val_acc
+            if is_best:
+                best_val_acc = val_acc
+            # Always save periodic checkpoint
+            if CONFIG['checkpoint_every_epoch']:
+                checkpoint_path = os.path.join(
+                    CONFIG['checkpoint_dir'],
+                    f'checkpoint_epoch_{epoch+1}.pt'
+                )
+                save_checkpoint(
+                    model, tokenizer, optimizer, scheduler, scaler,
+                    epoch + 1, val_acc, history, checkpoint_path, is_best=is_best
+                )
+                print(f"\n  💾 Checkpoint saved: {checkpoint_path}")
+            if is_best:
+                # Also save as best model
+                torch.save({
+                    'epoch': epoch + 1,
+                    'best_val_accuracy': best_val_acc,
+                    'config': CONFIG,
+                    'history': history
+                }, os.path.join(CONFIG['output_dir'], 'training_info.pt'))
+                print(f"  🏆 NEW BEST MODEL SAVED! Val Accuracy: {best_val_acc:.2f}%")
+            else:
+                print(f"\n  ℹ️  Best Val Accuracy so far: {best_val_acc:.2f}%")
+            print("─" * 70)
+            print()
+    except Exception as e:
+        # ============================================================
+        # EMERGENCY SAVE ON CRASH
+        # ============================================================
+        print()
+        print("!" * 70)
+        print("⚠️  ERROR OCCURRED - SAVING EMERGENCY CHECKPOINT")
+        print("!" * 70)
+        print(f"  Error: {e}")
+        emergency_dir = CONFIG['output_dir'] + '_emergency'
+        os.makedirs(emergency_dir, exist_ok=True)
+        try:
+            model.save_pretrained(emergency_dir)
+            tokenizer.save_pretrained(emergency_dir)
+            torch.save({
+                'epoch': epoch + 1 if 'epoch' in dir() else 0,
+                'history': history,
+                'config': CONFIG,
+                'error': str(e)
+            }, os.path.join(emergency_dir, 'emergency_checkpoint.pt'))
+            print(f"  ✓ Emergency checkpoint saved to: {emergency_dir}")
+        except Exception as save_error:
+            print(f"  ✗ Failed to save emergency checkpoint: {save_error}")
+        print("!" * 70)
+        raise  # Re-raise the exception
+    print("=" * 70)
+    print("TRAINING COMPLETE")
+    print("=" * 70)
+    print(f"  Total training time: {total_train_time/60:.1f} minutes")
+    print(f"  Best Val Accuracy:   {best_val_acc:.2f}%")
+    print("=" * 70)
+    print()
+    # ============================================================
+    # FINAL TEST EVALUATION
+    # ============================================================
+    print("=" * 70)
+    print("FINAL TEST EVALUATION")
+    print("=" * 70)
+    print("Loading best model...")
+    model = AutoModelForSequenceClassification.from_pretrained(
+        CONFIG['output_dir'],
+        local_files_only=True
+    )
+    model = model.to(device)
+    model.eval()
+    # Use standard loss for test evaluation (no class weights)
+    test_criterion = nn.CrossEntropyLoss()
+    test_loss = 0
+    test_correct = 0
+    test_total = 0
+    all_preds = []
+    all_labels = []
+    test_pbar = tqdm(test_loader, desc="Testing", unit="batch", ncols=120)
+    with torch.no_grad():
+        for batch_idx, (input_ids, attention_mask, labels) in enumerate(test_pbar):
+            input_ids = input_ids.to(device, non_blocking=True)
+            attention_mask = attention_mask.to(device, non_blocking=True)
+            labels = labels.to(device, non_blocking=True)
+            if CONFIG['use_amp']:
+                with torch.amp.autocast('cuda'):
+                    outputs = model(
+                        input_ids=input_ids,
+                        attention_mask=attention_mask
+                    )
+                    logits = outputs.logits
+                    loss = test_criterion(logits, labels)
+            else:
+                outputs = model(
+                    input_ids=input_ids,
+                    attention_mask=attention_mask
+                )
+                logits = outputs.logits
+                loss = test_criterion(logits, labels)
+            test_loss += loss.item()
+            _, predicted = logits.max(1)
+            test_total += labels.size(0)
+            test_correct += predicted.eq(labels).sum().item()
+            all_preds.extend(predicted.cpu().numpy())
+            all_labels.extend(labels.cpu().numpy())
+            test_pbar.set_postfix({
+                'loss': f'{test_loss/(batch_idx+1):.4f}',
+                'acc': f'{100*test_correct/test_total:.2f}%'
+            })
+    test_loss = test_loss / len(test_loader)
+    test_acc = 100 * test_correct / test_total
+    all_preds = np.array(all_preds)
+    all_labels = np.array(all_labels)
+    within_one = np.mean(np.abs(all_preds - all_labels) <= 1) * 100
+    print()
+    print("─" * 70)
+    print("TEST RESULTS")
+    print("─" * 70)
+    print(f"  Test Loss:              {test_loss:.4f}")
+    print(f"  Test Accuracy:          {test_acc:.2f}%")
+    print(f"  Within ±1 Star:         {within_one:.2f}%")
+    print("─" * 70)
+    print()
+    print("CLASSIFICATION REPORT")
+    print("─" * 70)
+    report = classification_report(
+        all_labels,
+        all_preds,
+        target_names=['1 Star', '2 Star', '3 Star', '4 Star', '5 Star'],
+        digits=3,
+        output_dict=True
+    )
+    print(classification_report(
+        all_labels,
+        all_preds,
+        target_names=['1 Star', '2 Star', '3 Star', '4 Star', '5 Star'],
+        digits=3
+    ))
+    # ============================================================
+    # PLOTS
+    # ============================================================
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+    epochs_range = range(1, len(history['train_loss']) + 1)
+    axes[0].plot(epochs_range, history['train_loss'], 'b-o', label='Train', linewidth=2)
+    axes[0].plot(epochs_range, history['val_loss'], 'r-o', label='Val', linewidth=2)
+    axes[0].set_xlabel('Epoch')
+    axes[0].set_ylabel('Loss')
+    axes[0].set_title('Loss (with Class Weights)' if CONFIG['use_class_weights'] else 'Loss')
+    axes[0].legend()
+    axes[0].grid(True, alpha=0.3)
+    axes[1].plot(epochs_range, history['train_acc'], 'b-o', label='Train', linewidth=2)
+    axes[1].plot(epochs_range, history['val_acc'], 'r-o', label='Val', linewidth=2)
+    axes[1].set_xlabel('Epoch')
+    axes[1].set_ylabel('Accuracy (%)')
+    axes[1].set_title('Accuracy')
+    axes[1].legend()
+    axes[1].grid(True, alpha=0.3)
+    plt.tight_layout()
+    plt.savefig('plots/training_history.png', dpi=150)
+    print("✓ Saved: plots/training_history.png")
+    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
+    cm = confusion_matrix(all_labels, all_preds)
+    labels_names = ['1 Star', '2 Star', '3 Star', '4 Star', '5 Star']
+    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
+                xticklabels=labels_names, yticklabels=labels_names, ax=axes[0])
+    axes[0].set_xlabel('Predicted')
+    axes[0].set_ylabel('Actual')
+    axes[0].set_title('Confusion Matrix (Counts)')
+    cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
+    sns.heatmap(cm_norm, annot=True, fmt='.1%', cmap='Blues',
+                xticklabels=labels_names, yticklabels=labels_names, ax=axes[1])
+    axes[1].set_xlabel('Predicted')
+    axes[1].set_ylabel('Actual')
+    axes[1].set_title('Confusion Matrix (Normalized)')
+    plt.tight_layout()
+    plt.savefig('plots/confusion_matrix.png', dpi=150)
+    print("✓ Saved: plots/confusion_matrix.png")
+    # ============================================================
+    # PER-CLASS RECALL COMPARISON PLOT (NEW!)
+    # ============================================================
+    fig, ax = plt.subplots(figsize=(10, 6))
+    classes = ['1 Star', '2 Star', '3 Star', '4 Star', '5 Star']
+    recalls = [report[c]['recall'] * 100 for c in classes]
+    bars = ax.bar(classes, recalls, color=['#ff6b6b', '#ffa94d', '#ffd43b', '#69db7c', '#4dabf7'])
+    ax.axhline(y=50, color='red', linestyle='--', alpha=0.5, label='50% threshold')
+    ax.axhline(y=75, color='green', linestyle='--', alpha=0.5, label='75% threshold')
+    for bar, recall in zip(bars, recalls):
+        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
+                f'{recall:.1f}%', ha='center', va='bottom', fontsize=11)
+    ax.set_ylabel('Recall (%)')
+    ax.set_title('Per-Class Recall (Higher = Better at detecting this class)')
+    ax.set_ylim(0, 105)
+    ax.legend()
+    ax.grid(True, alpha=0.3, axis='y')
+    plt.tight_layout()
+    plt.savefig('plots/per_class_recall.png', dpi=150)
+    print("✓ Saved: plots/per_class_recall.png")
+    # ============================================================
+    # SAVE RESULTS
+    # ============================================================
+    results = {
+        'best_val_accuracy': best_val_acc,
+        'test_accuracy': test_acc,
+        'test_within_one': within_one,
+        'history': history,
+        'config': CONFIG,
+        'train_time_minutes': total_train_time / 60,
+        'classification_report': report,
+        'confusion_matrix': cm.tolist()
+    }
+    torch.save(results, os.path.join(CONFIG['output_dir'], 'results.pt'))
+    print()
+    print("=" * 70)
+    print("🎉 ALL DONE!")
+    print("=" * 70)
+    print(f"  Best Val Accuracy: {best_val_acc:.2f}%")
+    print(f"  Test Accuracy:     {test_acc:.2f}%")
+    print(f"  Within ±1 Star:    {within_one:.2f}%")
+    print(f"  Training Time:     {total_train_time/60:.1f} minutes")
+    print()
+    print("  Per-Class Recall:")
+    for c in classes:
+        recall = report[c]['recall'] * 100
+        indicator = "✓" if recall >= 60 else "⚠️" if recall >= 40 else "✗"
+        print(f"    {indicator} {c}: {recall:.1f}%")
+    print("=" * 70)
+# ============================================================
+# ENTRY POINT
+# ============================================================
+if __name__ == '__main__':
+    main()

course_feedback_nlp/train_3_classes.py ADDED Viewed

	@@ -0,0 +1,872 @@

+"""
+Student Feedback Sentiment Model - Training Script
+==================================================
+Optimized for Teacher/Agent use case:
+- 3 classes: Negative, Neutral, Positive
+- High recall on negative feedback (don't miss struggling students)
+- Confidence scores for uncertainty
+- Fast inference for agent integration
+FIXED: save_checkpoint now properly preserves model config.json
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader, TensorDataset
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, confusion_matrix
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
+from transformers import get_linear_schedule_with_warmup
+from tqdm.auto import tqdm
+import matplotlib.pyplot as plt
+import seaborn as sns
+import os
+import time
+import gc
+import json
+import warnings
+warnings.filterwarnings('ignore')
+# ============================================================
+# AMD CRASH PROTECTION
+# ============================================================
+os.environ['AMD_LOG_LEVEL'] = '0'
+os.environ['ROCM_LOG_LEVEL'] = '0'
+os.environ['HIP_VISIBLE_DEVICES'] = '0'
+# ============================================================
+# CONFIGURATION - OPTIMIZED FOR TEACHER USE CASE
+# ============================================================
+CONFIG = {
+    # ==================== DATA ====================
+    'data_path': 'Coursera_reviews.csv',
+    'model_name': './distilbert-base-uncased',
+    'output_dir': 'teacher_sentiment_model',
+    'checkpoint_dir': 'checkpoints_teacher',
+    # ==================== CLASS MAPPING ====================
+    # Map 5-star ratings to 3 classes
+    'num_classes': 3,
+    'class_names': ['Negative', 'Neutral', 'Positive'],
+    'class_mapping': {
+        0: 0,  # 1-star → Negative (0)
+        1: 0,  # 2-star → Negative (0)
+        2: 1,  # 3-star → Neutral (1)
+        3: 2,  # 4-star → Positive (2)
+        4: 2,  # 5-star → Positive (2)
+    },
+    # ==================== TOKENIZATION ====================
+    'max_length': 96,
+    # ==================== TRAINING ====================
+    'batch_size': 128,
+    'gradient_accumulation_steps': 2,
+    'epochs': 7,
+    'learning_rate': 2e-5,
+    'weight_decay': 0.01,
+    'warmup_ratio': 0.06,
+    'max_grad_norm': 1.0,
+    # ==================== SCHEDULER ====================
+    'scheduler_type': 'cosine',
+    'cosine_min_lr_ratio': 0.01,
+    # ==================== LOSS FUNCTION ====================
+    'loss_type': 'focal',  # Focal loss to focus on hard examples
+    'focal_gamma': 2.0,
+    'label_smoothing': 0.05,  # Light smoothing for calibration
+    # ==================== CLASS IMBALANCE ====================
+    # IMPORTANT: Weight negative class higher - we don't want to miss struggling students!
+    'use_class_weights': True,
+    'class_weight_power': 0.7,  # Moderate-high weighting for minorities
+    'negative_class_boost': 1.5,  # Extra boost for negative class (teacher priority)
+    # ==================== EARLY STOPPING ====================
+    'early_stopping': True,
+    'early_stopping_patience': 3,
+    'early_stopping_metric': 'val_loss',
+    # ==================== HARDWARE ====================
+    'seed': 42,
+    'num_workers': 4,
+    'pin_memory': True,
+    'use_amp': True,
+    # ==================== CHECKPOINTING ====================
+    'checkpoint_every_epoch': True,
+    'save_total_limit': 3,
+    # ==================== DATA SPLIT ====================
+    'train_size': 0.8,
+    'val_size': 0.1,
+    'test_size': 0.1,
+}
+# ============================================================
+# CUSTOM LOSS FUNCTIONS
+# ============================================================
+class FocalLoss(nn.Module):
+    """
+    Focal Loss with label smoothing.
+    Focuses training on hard-to-classify examples.
+    """
+    def __init__(self, num_classes=3, gamma=2.0, alpha=None, label_smoothing=0.0):
+        super().__init__()
+        self.num_classes = num_classes
+        self.gamma = gamma
+        self.label_smoothing = label_smoothing
+        if alpha is not None:
+            self.register_buffer('alpha', alpha)
+        else:
+            self.alpha = None
+    def forward(self, logits, targets):
+        probs = F.softmax(logits, dim=-1)
+        pt = probs.gather(1, targets.unsqueeze(1)).squeeze(1)
+        # Focal weight
+        focal_weight = (1 - pt) ** self.gamma
+        # Cross entropy with optional label smoothing
+        if self.label_smoothing > 0:
+            confidence = 1.0 - self.label_smoothing
+            smooth_value = self.label_smoothing / (self.num_classes - 1)
+            one_hot = torch.zeros_like(logits).scatter_(1, targets.unsqueeze(1), 1)
+            smooth_targets = one_hot * confidence + (1 - one_hot) * smooth_value
+            log_probs = F.log_softmax(logits, dim=-1)
+            ce = -(smooth_targets * log_probs).sum(dim=-1)
+        else:
+            ce = F.cross_entropy(logits, targets, reduction='none')
+        loss = focal_weight * ce
+        if self.alpha is not None:
+            alpha_t = self.alpha[targets]
+            loss = alpha_t * loss
+        return loss.mean()
+# ============================================================
+# UTILITY FUNCTIONS
+# ============================================================
+def set_seed(seed):
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+def get_scheduler(optimizer, scheduler_type, total_steps, warmup_steps, config):
+    if scheduler_type == 'cosine':
+        min_lr_ratio = config.get('cosine_min_lr_ratio', 0.01)
+        def lr_lambda(current_step):
+            if current_step < warmup_steps:
+                return float(current_step) / float(max(1, warmup_steps))
+            progress = float(current_step - warmup_steps) / float(max(1, total_steps - warmup_steps))
+            return max(min_lr_ratio, 0.5 * (1.0 + np.cos(np.pi * progress)))
+        return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
+    else:
+        return get_linear_schedule_with_warmup(optimizer, warmup_steps, total_steps)
+def save_checkpoint(model, tokenizer, optimizer, scheduler, scaler, epoch,
+                    val_acc, val_loss, history, config, path, is_best=False):
+    """
+    Save training checkpoint.
+    FIXED: Now saves training_config.json separately instead of overwriting
+    the model's config.json (which needs model_type for loading).
+    """
+    checkpoint = {
+        'epoch': epoch,
+        'model_state_dict': model.state_dict(),
+        'optimizer_state_dict': optimizer.state_dict(),
+        'scheduler_state_dict': scheduler.state_dict(),
+        'scaler_state_dict': scaler.state_dict() if scaler else None,
+        'val_accuracy': val_acc,
+        'val_loss': val_loss,
+        'history': history,
+        'config': config,
+    }
+    torch.save(checkpoint, path)
+    if is_best:
+        # Save model and tokenizer - this creates the correct config.json with model_type
+        model.save_pretrained(config['output_dir'])
+        tokenizer.save_pretrained(config['output_dir'])
+        # FIXED: Save our custom training config to a SEPARATE file
+        # DO NOT overwrite the model's config.json!
+        training_config_path = os.path.join(config['output_dir'], 'training_config.json')
+        training_config = {
+            'num_classes': config['num_classes'],
+            'class_names': config['class_names'],
+            'class_mapping': {str(k): v for k, v in config['class_mapping'].items()},  # JSON needs string keys
+            'max_length': config['max_length'],
+        }
+        with open(training_config_path, 'w') as f:
+            json.dump(training_config, f, indent=2)
+        # Also update the model's config.json with our label mappings (properly!)
+        model_config = AutoConfig.from_pretrained(config['output_dir'])
+        model_config.num_labels = config['num_classes']
+        model_config.id2label = {i: name for i, name in enumerate(config['class_names'])}
+        model_config.label2id = {name: i for i, name in enumerate(config['class_names'])}
+        model_config.save_pretrained(config['output_dir'])
+def cleanup_old_checkpoints(checkpoint_dir, save_total_limit):
+    if save_total_limit is None or save_total_limit <= 0:
+        return
+    checkpoints = sorted([
+        f for f in os.listdir(checkpoint_dir)
+        if f.startswith('checkpoint_epoch_') and f.endswith('.pt')
+    ])
+    while len(checkpoints) > save_total_limit:
+        oldest = checkpoints.pop(0)
+        os.remove(os.path.join(checkpoint_dir, oldest))
+def tokenize_batch(texts, tokenizer, max_length, desc="Tokenizing"):
+    all_input_ids = []
+    all_attention_masks = []
+    batch_size = 10000
+    for i in tqdm(range(0, len(texts), batch_size), desc=desc):
+        batch_texts = texts[i:i+batch_size].tolist()
+        encodings = tokenizer(
+            batch_texts,
+            truncation=True,
+            padding='max_length',
+            max_length=max_length,
+            return_tensors='pt'
+        )
+        all_input_ids.append(encodings['input_ids'])
+        all_attention_masks.append(encodings['attention_mask'])
+    return torch.cat(all_input_ids, dim=0), torch.cat(all_attention_masks, dim=0)
+# ============================================================
+# MAIN FUNCTION
+# ============================================================
+def main():
+    set_seed(CONFIG['seed'])
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print("=" * 70)
+    print("STUDENT FEEDBACK SENTIMENT MODEL")
+    print("Optimized for Teacher/Agent Use Case")
+    print("=" * 70)
+    print()
+    print("TARGET CLASSES:")
+    print("  🔴 Negative (1-2 stars) → 'Needs Attention'")
+    print("  🟡 Neutral  (3 stars)   → 'Mixed/Unclear'")
+    print("  🟢 Positive (4-5 stars) → 'Satisfied'")
+    print()
+    print(f"Device: {device}")
+    if torch.cuda.is_available():
+        print(f"GPU: {torch.cuda.get_device_name(0)}")
+    print("=" * 70)
+    print()
+    # Create directories
+    os.makedirs(CONFIG['output_dir'], exist_ok=True)
+    os.makedirs(CONFIG['checkpoint_dir'], exist_ok=True)
+    os.makedirs('plots', exist_ok=True)
+    # ============================================================
+    # DATA LOADING & PREPROCESSING
+    # ============================================================
+    print("LOADING DATA")
+    print("-" * 70)
+    df = pd.read_csv(CONFIG['data_path'])
+    print(f"Raw data: {len(df):,} samples")
+    # Clean
+    df = df.dropna(subset=['reviews', 'rating'])
+    df = df[df['reviews'].str.strip() != '']
+    df['rating'] = df['rating'].astype(int)
+    df = df[df['rating'].between(1, 5)]
+    # Original 5-class labels
+    df['label_5class'] = df['rating'] - 1
+    # Map to 3 classes
+    df['label'] = df['label_5class'].map(CONFIG['class_mapping'])
+    print(f"Cleaned data: {len(df):,} samples")
+    print()
+    # Show original distribution
+    print("Original 5-class distribution:")
+    for rating in range(1, 6):
+        count = (df['rating'] == rating).sum()
+        pct = 100 * count / len(df)
+        print(f"  {rating} Star: {count:>8,} ({pct:>5.1f}%)")
+    print()
+    # Show new 3-class distribution
+    print("New 3-class distribution:")
+    class_counts_3 = []
+    for label, name in enumerate(CONFIG['class_names']):
+        count = (df['label'] == label).sum()
+        pct = 100 * count / len(df)
+        class_counts_3.append(count)
+        emoji = ['🔴', '🟡', '🟢'][label]
+        print(f"  {emoji} {name}: {count:>8,} ({pct:>5.1f}%)")
+    print()
+    # ============================================================
+    # CALCULATE CLASS WEIGHTS
+    # ============================================================
+    if CONFIG['use_class_weights']:
+        print("Calculating class weights...")
+        class_counts = np.array(class_counts_3)
+        # Inverse frequency
+        weights = 1.0 / class_counts
+        weights = weights / weights.sum() * len(weights)
+        # Apply power scaling
+        power = CONFIG['class_weight_power']
+        weights = weights ** power
+        weights = weights / weights.sum() * len(weights)
+        # Extra boost for negative class (teacher priority!)
+        negative_boost = CONFIG.get('negative_class_boost', 1.0)
+        weights[0] = weights[0] * negative_boost
+        # Re-normalize
+        weights = weights / weights.sum() * len(weights)
+        class_weights = torch.tensor(weights, dtype=torch.float32)
+        print("Class weights (higher = more important):")
+        for i, (name, w) in enumerate(zip(CONFIG['class_names'], class_weights)):
+            bar = "█" * int(w * 15)
+            boost_note = " ← BOOSTED (teacher priority)" if i == 0 else ""
+            print(f"  {name}: {w:.4f} {bar}{boost_note}")
+        print()
+    else:
+        class_weights = None
+    # ============================================================
+    # TRAIN / VAL / TEST SPLIT
+    # ============================================================
+    print("SPLITTING DATA")
+    print("-" * 70)
+    X_temp, X_test, y_temp, y_test = train_test_split(
+        df['reviews'].values, df['label'].values,
+        test_size=CONFIG['test_size'],
+        random_state=CONFIG['seed'],
+        stratify=df['label'].values
+    )
+    val_ratio = CONFIG['val_size'] / (CONFIG['train_size'] + CONFIG['val_size'])
+    X_train, X_val, y_train, y_val = train_test_split(
+        X_temp, y_temp,
+        test_size=val_ratio,
+        random_state=CONFIG['seed'],
+        stratify=y_temp
+    )
+    print(f"Train: {len(X_train):,} | Val: {len(X_val):,} | Test: {len(X_test):,}")
+    print()
+    del df
+    gc.collect()
+    # ============================================================
+    # TOKENIZATION
+    # ============================================================
+    print("TOKENIZATION")
+    print("-" * 70)
+    tokenizer = AutoTokenizer.from_pretrained(CONFIG['model_name'], local_files_only=True)
+    train_ids, train_masks = tokenize_batch(X_train, tokenizer, CONFIG['max_length'], "Train")
+    val_ids, val_masks = tokenize_batch(X_val, tokenizer, CONFIG['max_length'], "Val")
+    test_ids, test_masks = tokenize_batch(X_test, tokenizer, CONFIG['max_length'], "Test")
+    train_labels = torch.tensor(y_train, dtype=torch.long)
+    val_labels = torch.tensor(y_val, dtype=torch.long)
+    test_labels = torch.tensor(y_test, dtype=torch.long)
+    del X_train, X_val, X_test, y_train, y_val, y_test, X_temp, y_temp
+    gc.collect()
+    print()
+    # ============================================================
+    # DATALOADERS
+    # ============================================================
+    train_dataset = TensorDataset(train_ids, train_masks, train_labels)
+    val_dataset = TensorDataset(val_ids, val_masks, val_labels)
+    test_dataset = TensorDataset(test_ids, test_masks, test_labels)
+    train_loader = DataLoader(
+        train_dataset, batch_size=CONFIG['batch_size'], shuffle=True,
+        num_workers=CONFIG['num_workers'], pin_memory=CONFIG['pin_memory'],
+        persistent_workers=True, drop_last=True
+    )
+    val_loader = DataLoader(
+        val_dataset, batch_size=CONFIG['batch_size'], shuffle=False,
+        num_workers=CONFIG['num_workers'], pin_memory=CONFIG['pin_memory'],
+        persistent_workers=True
+    )
+    test_loader = DataLoader(
+        test_dataset, batch_size=CONFIG['batch_size'], shuffle=False,
+        num_workers=CONFIG['num_workers'], pin_memory=CONFIG['pin_memory'],
+        persistent_workers=True
+    )
+    print(f"Train batches: {len(train_loader):,}")
+    print()
+    # ============================================================
+    # MODEL (3 classes!)
+    # ============================================================
+    print("LOADING MODEL")
+    print("-" * 70)
+    model = AutoModelForSequenceClassification.from_pretrained(
+        CONFIG['model_name'],
+        num_labels=CONFIG['num_classes'],  # 3 classes!
+        local_files_only=True
+    )
+    model = model.to(device)
+    print(f"Model loaded with {CONFIG['num_classes']} output classes")
+    print()
+    # ============================================================
+    # LOSS FUNCTION
+    # ============================================================
+    if class_weights is not None:
+        class_weights = class_weights.to(device)
+    criterion = FocalLoss(
+        num_classes=CONFIG['num_classes'],
+        gamma=CONFIG['focal_gamma'],
+        alpha=class_weights,
+        label_smoothing=CONFIG['label_smoothing']
+    )
+    print(f"Loss: Focal (γ={CONFIG['focal_gamma']}) + Label Smoothing ({CONFIG['label_smoothing']})")
+    print()
+    # ============================================================
+    # OPTIMIZER & SCHEDULER
+    # ============================================================
+    optimizer = torch.optim.AdamW(
+        model.parameters(),
+        lr=CONFIG['learning_rate'],
+        weight_decay=CONFIG['weight_decay']
+    )
+    accum_steps = CONFIG['gradient_accumulation_steps']
+    steps_per_epoch = len(train_loader) // accum_steps
+    total_steps = steps_per_epoch * CONFIG['epochs']
+    warmup_steps = int(total_steps * CONFIG['warmup_ratio'])
+    scheduler = get_scheduler(optimizer, CONFIG['scheduler_type'], total_steps, warmup_steps, CONFIG)
+    scaler = torch.amp.GradScaler('cuda') if CONFIG['use_amp'] else None
+    # ============================================================
+    # TRAINING LOOP
+    # ============================================================
+    print("=" * 70)
+    print("TRAINING")
+    print("=" * 70)
+    print()
+    best_val_acc = 0
+    best_val_loss = float('inf')
+    patience_counter = 0
+    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'lr': []}
+    total_train_time = 0
+    try:
+        for epoch in range(CONFIG['epochs']):
+            epoch_start = time.time()
+            # === TRAIN ===
+            model.train()
+            train_loss, train_correct, train_total = 0, 0, 0
+            optimizer.zero_grad()
+            pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [Train]", ncols=120)
+            for batch_idx, (input_ids, attention_mask, labels) in enumerate(pbar):
+                input_ids = input_ids.to(device, non_blocking=True)
+                attention_mask = attention_mask.to(device, non_blocking=True)
+                labels = labels.to(device, non_blocking=True)
+                if CONFIG['use_amp']:
+                    with torch.amp.autocast('cuda'):
+                        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+                        loss = criterion(outputs.logits, labels) / accum_steps
+                    scaler.scale(loss).backward()
+                else:
+                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+                    loss = criterion(outputs.logits, labels) / accum_steps
+                    loss.backward()
+                if (batch_idx + 1) % accum_steps == 0:
+                    if CONFIG['use_amp']:
+                        scaler.unscale_(optimizer)
+                        torch.nn.utils.clip_grad_norm_(model.parameters(), CONFIG['max_grad_norm'])
+                        scaler.step(optimizer)
+                        scaler.update()
+                    else:
+                        torch.nn.utils.clip_grad_norm_(model.parameters(), CONFIG['max_grad_norm'])
+                        optimizer.step()
+                    scheduler.step()
+                    optimizer.zero_grad()
+                train_loss += loss.item() * accum_steps
+                _, pred = outputs.logits.max(1)
+                train_total += labels.size(0)
+                train_correct += pred.eq(labels).sum().item()
+                pbar.set_postfix({
+                    'loss': f'{train_loss/(batch_idx+1):.4f}',
+                    'acc': f'{100*train_correct/train_total:.1f}%'
+                })
+            train_loss /= len(train_loader)
+            train_acc = 100 * train_correct / train_total
+            # === VALIDATION ===
+            model.eval()
+            val_loss, val_correct, val_total = 0, 0, 0
+            with torch.no_grad():
+                for input_ids, attention_mask, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [Val]", ncols=120):
+                    input_ids = input_ids.to(device, non_blocking=True)
+                    attention_mask = attention_mask.to(device, non_blocking=True)
+                    labels = labels.to(device, non_blocking=True)
+                    if CONFIG['use_amp']:
+                        with torch.amp.autocast('cuda'):
+                            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+                            loss = criterion(outputs.logits, labels)
+                    else:
+                        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+                        loss = criterion(outputs.logits, labels)
+                    val_loss += loss.item()
+                    _, pred = outputs.logits.max(1)
+                    val_total += labels.size(0)
+                    val_correct += pred.eq(labels).sum().item()
+            val_loss /= len(val_loader)
+            val_acc = 100 * val_correct / val_total
+            epoch_time = time.time() - epoch_start
+            total_train_time += epoch_time
+            history['train_loss'].append(train_loss)
+            history['train_acc'].append(train_acc)
+            history['val_loss'].append(val_loss)
+            history['val_acc'].append(val_acc)
+            history['lr'].append(scheduler.get_last_lr()[0])
+            # === EPOCH SUMMARY ===
+            print()
+            print(f"  Epoch {epoch+1}: Train Loss={train_loss:.4f}, Acc={train_acc:.2f}% | Val Loss={val_loss:.4f}, Acc={val_acc:.2f}% | Time={epoch_time:.0f}s")
+            # Checkpointing
+            is_best = val_loss < best_val_loss
+            if is_best:
+                best_val_loss = val_loss
+                patience_counter = 0
+            else:
+                patience_counter += 1
+            if val_acc > best_val_acc:
+                best_val_acc = val_acc
+            if CONFIG['checkpoint_every_epoch']:
+                ckpt_path = os.path.join(CONFIG['checkpoint_dir'], f'checkpoint_epoch_{epoch+1}.pt')
+                save_checkpoint(model, tokenizer, optimizer, scheduler, scaler,
+                                epoch+1, val_acc, val_loss, history, CONFIG, ckpt_path, is_best)
+                cleanup_old_checkpoints(CONFIG['checkpoint_dir'], CONFIG['save_total_limit'])
+            if is_best:
+                print(f"  🏆 New best model saved!")
+            if CONFIG['early_stopping'] and patience_counter >= CONFIG['early_stopping_patience']:
+                print(f"\n  🛑 Early stopping after {epoch+1} epochs")
+                break
+            print()
+    except Exception as e:
+        print(f"\n⚠️ Error: {e}")
+        emergency_dir = CONFIG['output_dir'] + '_emergency'
+        os.makedirs(emergency_dir, exist_ok=True)
+        model.save_pretrained(emergency_dir)
+        tokenizer.save_pretrained(emergency_dir)
+        raise
+    print("=" * 70)
+    print(f"TRAINING COMPLETE - {total_train_time/60:.1f} minutes")
+    print("=" * 70)
+    print()
+    # ============================================================
+    # FINAL TEST EVALUATION
+    # ============================================================
+    print("FINAL TEST EVALUATION")
+    print("-" * 70)
+    # Load best model (now works without fix!)
+    model = AutoModelForSequenceClassification.from_pretrained(
+        CONFIG['output_dir'], local_files_only=True
+    )
+    model = model.to(device)
+    model.eval()
+    all_preds, all_labels, all_probs = [], [], []
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in tqdm(test_loader, desc="Testing"):
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            if CONFIG['use_amp']:
+                with torch.amp.autocast('cuda'):
+                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+            else:
+                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+            probs = F.softmax(outputs.logits, dim=-1)
+            _, preds = outputs.logits.max(1)
+            all_preds.extend(preds.cpu().numpy())
+            all_labels.extend(labels.numpy())
+            all_probs.extend(probs.cpu().numpy())
+    all_preds = np.array(all_preds)
+    all_labels = np.array(all_labels)
+    all_probs = np.array(all_probs)
+    test_acc = 100 * (all_preds == all_labels).mean()
+    print()
+    print(f"Test Accuracy: {test_acc:.2f}%")
+    print()
+    # Classification Report
+    print("CLASSIFICATION REPORT")
+    print("-" * 70)
+    report = classification_report(
+        all_labels, all_preds,
+        target_names=CONFIG['class_names'],
+        digits=3,
+        output_dict=True
+    )
+    print(classification_report(
+        all_labels, all_preds,
+        target_names=CONFIG['class_names'],
+        digits=3
+    ))
+    # ============================================================
+    # TEACHER-FOCUSED METRICS
+    # ============================================================
+    print()
+    print("=" * 70)
+    print("📊 TEACHER-FOCUSED METRICS")
+    print("=" * 70)
+    print()
+    # Negative class recall (MOST IMPORTANT for teachers)
+    negative_recall = report['Negative']['recall'] * 100
+    negative_precision = report['Negative']['precision'] * 100
+    print(f"  🔴 NEGATIVE FEEDBACK DETECTION (Struggling Students):")
+    print(f"     Recall:    {negative_recall:.1f}% ← {negative_recall:.0f}% of struggling students caught")
+    print(f"     Precision: {negative_precision:.1f}% ← {negative_precision:.0f}% of flags are real issues")
+    print()
+    # False negative analysis (missed struggling students)
+    false_negatives = ((all_labels == 0) & (all_preds != 0)).sum()
+    total_negatives = (all_labels == 0).sum()
+    missed_pct = 100 * false_negatives / total_negatives
+    print(f"  ⚠️  MISSED STRUGGLING STUDENTS:")
+    print(f"     {false_negatives:,} of {total_negatives:,} negative cases missed ({missed_pct:.1f}%)")
+    print()
+    # Confidence analysis
+    pred_confidence = all_probs.max(axis=1)
+    low_confidence = (pred_confidence < 0.7).sum()
+    low_conf_pct = 100 * low_confidence / len(pred_confidence)
+    print(f"  🤔 UNCERTAIN PREDICTIONS (confidence < 70%):")
+    print(f"     {low_confidence:,} of {len(pred_confidence):,} predictions ({low_conf_pct:.1f}%)")
+    print(f"     → These should be flagged for manual review")
+    print()
+    # ============================================================
+    # PLOTS
+    # ============================================================
+    # Confusion Matrix
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+    cm = confusion_matrix(all_labels, all_preds)
+    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
+                xticklabels=CONFIG['class_names'],
+                yticklabels=CONFIG['class_names'], ax=axes[0])
+    axes[0].set_xlabel('Predicted')
+    axes[0].set_ylabel('Actual')
+    axes[0].set_title('Confusion Matrix (Counts)')
+    cm_norm = cm.astype(float) / cm.sum(axis=1, keepdims=True)
+    sns.heatmap(cm_norm, annot=True, fmt='.1%', cmap='Blues',
+                xticklabels=CONFIG['class_names'],
+                yticklabels=CONFIG['class_names'], ax=axes[1])
+    axes[1].set_xlabel('Predicted')
+    axes[1].set_ylabel('Actual')
+    axes[1].set_title('Confusion Matrix (Recall)')
+    plt.tight_layout()
+    plt.savefig('plots/confusion_matrix_3class.png', dpi=150)
+    print("✓ Saved: plots/confusion_matrix_3class.png")
+    # Per-class metrics
+    fig, ax = plt.subplots(figsize=(10, 6))
+    x = np.arange(3)
+    width = 0.25
+    recalls = [report[c]['recall']*100 for c in CONFIG['class_names']]
+    precisions = [report[c]['precision']*100 for c in CONFIG['class_names']]
+    f1s = [report[c]['f1-score']*100 for c in CONFIG['class_names']]
+    bars1 = ax.bar(x - width, recalls, width, label='Recall', color='#e74c3c')
+    bars2 = ax.bar(x, precisions, width, label='Precision', color='#3498db')
+    bars3 = ax.bar(x + width, f1s, width, label='F1-Score', color='#2ecc71')
+    ax.set_ylabel('Score (%)')
+    ax.set_title('Per-Class Metrics (3-Class Model)')
+    ax.set_xticks(x)
+    ax.set_xticklabels(['🔴 Negative\n(Needs Attention)', '🟡 Neutral\n(Mixed)', '🟢 Positive\n(Satisfied)'])
+    ax.legend()
+    ax.set_ylim(0, 105)
+    ax.axhline(y=80, color='gray', linestyle='--', alpha=0.5)
+    for bars in [bars1, bars2, bars3]:
+        for bar in bars:
+            height = bar.get_height()
+            ax.annotate(f'{height:.0f}%', xy=(bar.get_x() + bar.get_width()/2, height),
+                        xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=9)
+    plt.tight_layout()
+    plt.savefig('plots/per_class_metrics_3class.png', dpi=150)
+    print("✓ Saved: plots/per_class_metrics_3class.png")
+    # Training history
+    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
+    epochs_range = range(1, len(history['train_loss']) + 1)
+    axes[0].plot(epochs_range, history['train_loss'], 'b-o', label='Train')
+    axes[0].plot(epochs_range, history['val_loss'], 'r-o', label='Val')
+    axes[0].set_xlabel('Epoch')
+    axes[0].set_ylabel('Loss')
+    axes[0].set_title('Training Loss')
+    axes[0].legend()
+    axes[0].grid(True, alpha=0.3)
+    axes[1].plot(epochs_range, history['train_acc'], 'b-o', label='Train')
+    axes[1].plot(epochs_range, history['val_acc'], 'r-o', label='Val')
+    axes[1].set_xlabel('Epoch')
+    axes[1].set_ylabel('Accuracy (%)')
+    axes[1].set_title('Training Accuracy')
+    axes[1].legend()
+    axes[1].grid(True, alpha=0.3)
+    plt.tight_layout()
+    plt.savefig('plots/training_history_3class.png', dpi=150)
+    print("✓ Saved: plots/training_history_3class.png")
+    # ============================================================
+    # SAVE RESULTS
+    # ============================================================
+    results = {
+        'test_accuracy': test_acc,
+        'negative_recall': negative_recall,
+        'negative_precision': negative_precision,
+        'missed_struggling_students': int(false_negatives),
+        'total_negative_cases': int(total_negatives),
+        'low_confidence_predictions': int(low_confidence),
+        'config': CONFIG,
+        'classification_report': report,
+        'training_time_minutes': total_train_time / 60,
+    }
+    torch.save(results, os.path.join(CONFIG['output_dir'], 'results.pt'))
+    with open(os.path.join(CONFIG['output_dir'], 'results.json'), 'w') as f:
+        save_results = {k: v for k, v in results.items() if k not in ['config', 'classification_report']}
+        save_results['per_class_recall'] = {c: report[c]['recall'] for c in CONFIG['class_names']}
+        json.dump(save_results, f, indent=2)
+    # ============================================================
+    # FINAL SUMMARY
+    # ============================================================
+    print()
+    print("=" * 70)
+    print("🎉 TRAINING COMPLETE!")
+    print("=" * 70)
+    print()
+    print(f"  Model saved to: {CONFIG['output_dir']}/")
+    print()
+    print("  RESULTS:")
+    print(f"    Test Accuracy:     {test_acc:.1f}%")
+    print(f"    Negative Recall:   {negative_recall:.1f}% ← Catches {negative_recall:.0f}% of struggling students")
+    print(f"    Negative Precision: {negative_precision:.1f}%")
+    print()
+    print("  PER-CLASS RECALL:")
+    for name in CONFIG['class_names']:
+        recall = report[name]['recall'] * 100
+        emoji = '🔴' if name == 'Negative' else ('🟡' if name == 'Neutral' else '🟢')
+        print(f"    {emoji} {name}: {recall:.1f}%")
+    print()
+    print("=" * 70)
+if __name__ == '__main__':
+    main()

dropout_binaryclass/correlation.py ADDED Viewed

	@@ -0,0 +1,218 @@

+#!/usr/bin/env python
+# coding: utf-8
+"""
+Feature Correlation Analysis
+Helps identify redundant features and features most correlated with Target.
+"""
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+# =============================================================================
+# 1. LOAD DATA
+# =============================================================================
+df = pd.read_csv('data.csv', sep=';')
+df = df[df['Target'] != 'Enrolled']
+df['Target'] = df['Target'].map({'Dropout': 0, 'Graduate': 1})
+print(f"Dataset shape: {df.shape}")
+print(f"Features: {df.shape[1] - 1}")
+# =============================================================================
+# 2. CORRELATION WITH TARGET
+# =============================================================================
+print("\n" + "="*70)
+print("CORRELATION WITH TARGET (Dropout=0, Graduate=1)")
+print("="*70)
+# Calculate correlation with target
+target_corr = df.corr()['Target'].drop('Target').sort_values(key=abs, ascending=False)
+print("\nAll features ranked by absolute correlation with Target:\n")
+for i, (feature, corr) in enumerate(target_corr.items(), 1):
+    strength = "STRONG" if abs(corr) > 0.3 else "MODERATE" if abs(corr) > 0.15 else "WEAK"
+    print(f"{i:2d}. {feature:50s} {corr:+.4f}  [{strength}]")
+# Plot correlation with target
+plt.figure(figsize=(12, 10))
+colors = ['green' if c > 0 else 'red' for c in target_corr.values]
+target_corr.plot(kind='barh', color=colors)
+plt.title('Feature Correlation with Target (Graduate=1)')
+plt.xlabel('Correlation Coefficient')
+plt.axvline(x=0, color='black', linewidth=0.5)
+plt.axvline(x=0.3, color='blue', linestyle='--', alpha=0.5, label='Strong threshold')
+plt.axvline(x=-0.3, color='blue', linestyle='--', alpha=0.5)
+plt.tight_layout()
+plt.savefig('correlation_with_target.png', dpi=150)
+plt.show()
+# =============================================================================
+# 3. FEATURE-TO-FEATURE CORRELATION (Find Redundant Features)
+# =============================================================================
+print("\n" + "="*70)
+print("HIGHLY CORRELATED FEATURE PAIRS (Potential Redundancy)")
+print("="*70)
+# Calculate correlation matrix
+corr_matrix = df.drop('Target', axis=1).corr()
+# Find highly correlated pairs
+high_corr_pairs = []
+threshold = 0.7
+for i in range(len(corr_matrix.columns)):
+    for j in range(i+1, len(corr_matrix.columns)):
+        corr_value = corr_matrix.iloc[i, j]
+        if abs(corr_value) >= threshold:
+            high_corr_pairs.append({
+                'Feature 1': corr_matrix.columns[i],
+                'Feature 2': corr_matrix.columns[j],
+                'Correlation': corr_value
+            })
+high_corr_df = pd.DataFrame(high_corr_pairs).sort_values('Correlation', key=abs, ascending=False)
+print(f"\nFeature pairs with correlation >= {threshold}:\n")
+if len(high_corr_df) > 0:
+    for _, row in high_corr_df.iterrows():
+        print(f"  {row['Correlation']:+.4f}  |  {row['Feature 1']}")
+        print(f"           |  {row['Feature 2']}")
+        print()
+else:
+    print("  No highly correlated pairs found.")
+# =============================================================================
+# 4. CORRELATION HEATMAP
+# =============================================================================
+plt.figure(figsize=(20, 16))
+sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm',
+            center=0, square=True, linewidths=0.5,
+            annot_kws={'size': 6})
+plt.title('Feature Correlation Matrix')
+plt.tight_layout()
+plt.savefig('correlation_matrix.png', dpi=150)
+plt.show()
+# =============================================================================
+# 5. RECOMMENDATIONS FOR FEATURE SELECTION
+# =============================================================================
+print("\n" + "="*70)
+print("FEATURE SELECTION RECOMMENDATIONS")
+print("="*70)
+# Weak correlation with target (candidates for removal)
+weak_threshold = 0.05
+weak_features = target_corr[abs(target_corr) < weak_threshold]
+print(f"\n1. WEAK CORRELATION WITH TARGET (|corr| < {weak_threshold}):")
+print("   Consider removing these - they may not help prediction:\n")
+for feature, corr in weak_features.items():
+    print(f"   - {feature}: {corr:+.4f}")
+# Features to keep (strong correlation)
+strong_threshold = 0.2
+strong_features = target_corr[abs(target_corr) >= strong_threshold]
+print(f"\n2. STRONG CORRELATION WITH TARGET (|corr| >= {strong_threshold}):")
+print("   Keep these - they are predictive:\n")
+for feature, corr in strong_features.items():
+    print(f"   + {feature}: {corr:+.4f}")
+# Redundant features (high correlation with each other)
+print(f"\n3. REDUNDANT FEATURES (correlated with each other >= {threshold}):")
+print("   Consider keeping only one from each pair:\n")
+for _, row in high_corr_df.iterrows():
+    # Suggest keeping the one more correlated with target
+    corr1 = abs(target_corr.get(row['Feature 1'], 0))
+    corr2 = abs(target_corr.get(row['Feature 2'], 0))
+    keep = row['Feature 1'] if corr1 >= corr2 else row['Feature 2']
+    drop = row['Feature 2'] if corr1 >= corr2 else row['Feature 1']
+    print(f"   KEEP: {keep} (target corr: {target_corr.get(keep, 0):+.4f})")
+    print(f"   DROP: {drop} (target corr: {target_corr.get(drop, 0):+.4f})")
+    print()
+# =============================================================================
+# 6. SUGGESTED FEATURES TO DROP
+# =============================================================================
+print("\n" + "="*70)
+print("SUGGESTED FEATURES TO DROP")
+print("="*70)
+features_to_drop = set()
+# Add weak features
+for f in weak_features.index:
+    features_to_drop.add(f)
+# Add redundant features (the one less correlated with target)
+for _, row in high_corr_df.iterrows():
+    corr1 = abs(target_corr.get(row['Feature 1'], 0))
+    corr2 = abs(target_corr.get(row['Feature 2'], 0))
+    drop = row['Feature 2'] if corr1 >= corr2 else row['Feature 1']
+    features_to_drop.add(drop)
+print(f"\nBased on analysis, consider dropping these {len(features_to_drop)} features:\n")
+for f in features_to_drop:
+    reason = []
+    if f in weak_features.index:
+        reason.append(f"weak target corr ({target_corr[f]:+.4f})")
+    if f in [row['Feature 1'] for _, row in high_corr_df.iterrows()] or \
+       f in [row['Feature 2'] for _, row in high_corr_df.iterrows()]:
+        reason.append("redundant with another feature")
+    print(f"  - {f}")
+    print(f"    Reason: {', '.join(reason)}")
+# Features to keep
+features_to_keep = [f for f in target_corr.index if f not in features_to_drop]
+print(f"\nKeep these {len(features_to_keep)} features:\n")
+for f in features_to_keep:
+    print(f"  + {f} (target corr: {target_corr[f]:+.4f})")
+# =============================================================================
+# 7. GENERATE CODE SNIPPET
+# =============================================================================
+print("\n" + "="*70)
+print("CODE SNIPPET FOR YOUR TRAINING SCRIPT")
+print("="*70)
+print("\n# Copy this to your training script:")
+print(f"columns_to_drop = {list(features_to_drop)}")
+# =============================================================================
+# 8. SAVE ANALYSIS RESULTS
+# =============================================================================
+# Save correlation with target
+target_corr.to_csv('target_correlations.csv', header=['correlation'])
+# Save high correlation pairs
+if len(high_corr_df) > 0:
+    high_corr_df.to_csv('redundant_feature_pairs.csv', index=False)
+# Save recommendations
+with open('feature_selection_recommendations.txt', 'w') as f:
+    f.write("FEATURE SELECTION RECOMMENDATIONS\n")
+    f.write("="*50 + "\n\n")
+    f.write(f"Features to DROP ({len(features_to_drop)}):\n")
+    for feat in features_to_drop:
+        f.write(f"  - {feat}\n")
+    f.write(f"\nFeatures to KEEP ({len(features_to_keep)}):\n")
+    for feat in features_to_keep:
+        f.write(f"  + {feat}\n")
+print("\nFiles saved:")
+print("  1. correlation_with_target.png")
+print("  2. correlation_matrix.png")
+print("  3. target_correlations.csv")
+print("  4. redundant_feature_pairs.csv")
+print("  5. feature_selection_recommendations.txt")

dropout_binaryclass/data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

dropout_binaryclass/feature_importance.png ADDED Viewed

dropout_binaryclass/feature_selection_recommendations.txt ADDED Viewed

	@@ -0,0 +1,42 @@

+FEATURE SELECTION RECOMMENDATIONS
+==================================================
+Features to DROP (17):
+  - Curricular units 2nd sem (enrolled)
+  - Curricular units 1st sem (credited)
+  - Nacionality
+  - Mother's occupation
+  - Curricular units 1st sem (approved)
+  - Educational special needs
+  - Inflation rate
+  - International
+  - Curricular units 2nd sem (credited)
+  - Curricular units 2nd sem (grade)
+  - Course
+  - Curricular units 1st sem (enrolled)
+  - Father's occupation
+  - Curricular units 1st sem (evaluations)
+  - Curricular units 1st sem (grade)
+  - Father's qualification
+  - Unemployment rate
+Features to KEEP (19):
+  + Curricular units 2nd sem (approved)
+  + Tuition fees up to date
+  + Scholarship holder
+  + Age at enrollment
+  + Debtor
+  + Gender
+  + Application mode
+  + Admission grade
+  + Displaced
+  + Curricular units 2nd sem (evaluations)
+  + Previous qualification (grade)
+  + Curricular units 2nd sem (without evaluations)
+  + Marital status
+  + Application order
+  + Daytime/evening attendance
+  + Curricular units 1st sem (without evaluations)
+  + Previous qualification
+  + Mother's qualification
+  + GDP

dropout_binaryclass/model_config.json ADDED Viewed

	@@ -0,0 +1,411 @@

+{
+  "model_name": "Student Dropout Prediction Model",
+  "model_type": "LogisticRegression with StandardScaler",
+  "target_mapping": {
+    "0": "Dropout",
+    "1": "Graduate"
+  },
+  "features": [
+    "Marital status",
+    "Application mode",
+    "Application order",
+    "Course",
+    "Daytime/evening attendance\t",
+    "Previous qualification",
+    "Previous qualification (grade)",
+    "Nacionality",
+    "Mother's qualification",
+    "Father's qualification",
+    "Mother's occupation",
+    "Admission grade",
+    "Displaced",
+    "Educational special needs",
+    "Debtor",
+    "Tuition fees up to date",
+    "Gender",
+    "Scholarship holder",
+    "Age at enrollment",
+    "International",
+    "Curricular units 1st sem (credited)",
+    "Curricular units 1st sem (enrolled)",
+    "Curricular units 1st sem (evaluations)",
+    "Curricular units 1st sem (approved)",
+    "Curricular units 1st sem (grade)",
+    "Curricular units 1st sem (without evaluations)",
+    "Curricular units 2nd sem (evaluations)",
+    "Curricular units 2nd sem (grade)",
+    "Curricular units 2nd sem (without evaluations)",
+    "Unemployment rate",
+    "Inflation rate",
+    "GDP"
+  ],
+  "num_features": 32,
+  "dropped_columns": [
+    "Father's occupation",
+    "Curricular units 2nd sem (credited)",
+    "Curricular units 2nd sem (enrolled)",
+    "Curricular units 2nd sem (approved)"
+  ],
+  "feature_details": {
+    "Marital status": {
+      "dtype": "int64",
+      "min": 1.0,
+      "max": 6.0,
+      "mean": 1.184297520661157,
+      "example_value": 1
+    },
+    "Application mode": {
+      "dtype": "int64",
+      "min": 1.0,
+      "max": 57.0,
+      "mean": 18.421763085399448,
+      "example_value": 17
+    },
+    "Application order": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 6.0,
+      "mean": 1.750137741046832,
+      "example_value": 5
+    },
+    "Course": {
+      "dtype": "int64",
+      "min": 33.0,
+      "max": 9991.0,
+      "mean": 8853.980991735538,
+      "example_value": 171
+    },
+    "Daytime/evening attendance\t": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 1.0,
+      "mean": 0.8876033057851239,
+      "example_value": 1
+    },
+    "Previous qualification": {
+      "dtype": "int64",
+      "min": 1.0,
+      "max": 43.0,
+      "mean": 4.532231404958678,
+      "example_value": 1
+    },
+    "Previous qualification (grade)": {
+      "dtype": "int64",
+      "min": 95.0,
+      "max": 190.0,
+      "mean": 132.90881542699725,
+      "example_value": 122
+    },
+    "Nacionality": {
+      "dtype": "int64",
+      "min": 1.0,
+      "max": 109.0,
+      "mean": 1.828099173553719,
+      "example_value": 1
+    },
+    "Mother's qualification": {
+      "dtype": "int64",
+      "min": 1.0,
+      "max": 44.0,
+      "mean": 19.986225895316803,
+      "example_value": 19
+    },
+    "Father's qualification": {
+      "dtype": "int64",
+      "min": 1.0,
+      "max": 44.0,
+      "mean": 22.57162534435262,
+      "example_value": 12
+    },
+    "Mother's occupation": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 194.0,
+      "mean": 10.138567493112948,
+      "example_value": 5
+    },
+    "Admission grade": {
+      "dtype": "int64",
+      "min": 95.0,
+      "max": 190.0,
+      "mean": 127.28870523415978,
+      "example_value": 127
+    },
+    "Displaced": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 1.0,
+      "mean": 0.5490358126721763,
+      "example_value": 1
+    },
+    "Educational special needs": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 1.0,
+      "mean": 0.011019283746556474,
+      "example_value": 0
+    },
+    "Debtor": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 1.0,
+      "mean": 0.1137741046831956,
+      "example_value": 0
+    },
+    "Tuition fees up to date": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 1.0,
+      "mean": 0.8661157024793389,
+      "example_value": 1
+    },
+    "Gender": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 1.0,
+      "mean": 0.3440771349862259,
+      "example_value": 1
+    },
+    "Scholarship holder": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 1.0,
+      "mean": 0.26694214876033057,
+      "example_value": 0
+    },
+    "Age at enrollment": {
+      "dtype": "int64",
+      "min": 17.0,
+      "max": 70.0,
+      "mean": 23.461157024793387,
+      "example_value": 20
+    },
+    "International": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 1.0,
+      "mean": 0.023691460055096418,
+      "example_value": 0
+    },
+    "Curricular units 1st sem (credited)": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 20.0,
+      "mean": 0.7542699724517906,
+      "example_value": 0
+    },
+    "Curricular units 1st sem (enrolled)": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 26.0,
+      "mean": 6.337465564738292,
+      "example_value": 0
+    },
+    "Curricular units 1st sem (evaluations)": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 45.0,
+      "mean": 8.071074380165289,
+      "example_value": 0
+    },
+    "Curricular units 1st sem (approved)": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 26.0,
+      "mean": 4.791460055096419,
+      "example_value": 0
+    },
+    "Curricular units 1st sem (grade)": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 19.0,
+      "mean": 10.539118457300276,
+      "example_value": 0
+    },
+    "Curricular units 1st sem (without evaluations)": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 12.0,
+      "mean": 0.12892561983471074,
+      "example_value": 0
+    },
+    "Curricular units 2nd sem (evaluations)": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 33.0,
+      "mean": 7.763085399449036,
+      "example_value": 0
+    },
+    "Curricular units 2nd sem (grade)": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 19.0,
+      "mean": 10.038842975206611,
+      "example_value": 0
+    },
+    "Curricular units 2nd sem (without evaluations)": {
+      "dtype": "int64",
+      "min": 0.0,
+      "max": 12.0,
+      "mean": 0.14214876033057852,
+      "example_value": 0
+    },
+    "Unemployment rate": {
+      "dtype": "int64",
+      "min": 8.0,
+      "max": 16.0,
+      "mean": 11.682920110192837,
+      "example_value": 11
+    },
+    "Inflation rate": {
+      "dtype": "int64",
+      "min": -1.0,
+      "max": 4.0,
+      "mean": 1.215702479338843,
+      "example_value": 1
+    },
+    "GDP": {
+      "dtype": "int64",
+      "min": -4.0,
+      "max": 4.0,
+      "mean": 0.0418732782369146,
+      "example_value": 2
+    }
+  },
+  "model_performance": {
+    "avg_roc_auc": 0.9426,
+    "std_roc_auc": 0.0022,
+    "avg_accuracy": 0.8904,
+    "std_accuracy": 0.0123
+  },
+  "feature_importance": [
+    {
+      "feature": "Curricular units 1st sem (approved)",
+      "coefficient": 3.3163108538242474
+    },
+    {
+      "feature": "Curricular units 2nd sem (grade)",
+      "coefficient": 1.5439405534216617
+    },
+    {
+      "feature": "Curricular units 1st sem (enrolled)",
+      "coefficient": -1.1411938218498847
+    },
+    {
+      "feature": "Tuition fees up to date",
+      "coefficient": 0.9630826567928356
+    },
+    {
+      "feature": "Curricular units 1st sem (credited)",
+      "coefficient": -0.8539015768167176
+    },
+    {
+      "feature": "Curricular units 2nd sem (evaluations)",
+      "coefficient": -0.6369395746417482
+    },
+    {
+      "feature": "Course",
+      "coefficient": -0.6055334597267776
+    },
+    {
+      "feature": "International",
+      "coefficient": 0.4993629811863151
+    },
+    {
+      "feature": "Curricular units 1st sem (grade)",
+      "coefficient": -0.4580579977450427
+    },
+    {
+      "feature": "Debtor",
+      "coefficient": -0.3870319293027283
+    },
+    {
+      "feature": "Nacionality",
+      "coefficient": -0.36386269065696214
+    },
+    {
+      "feature": "Scholarship holder",
+      "coefficient": 0.3601197899922311
+    },
+    {
+      "feature": "Age at enrollment",
+      "coefficient": -0.29681419535938647
+    },
+    {
+      "feature": "Gender",
+      "coefficient": -0.22961088968596147
+    },
+    {
+      "feature": "Mother's occupation",
+      "coefficient": 0.20867097544620444
+    },
+    {
+      "feature": "Displaced",
+      "coefficient": -0.19965059186513248
+    },
+    {
+      "feature": "Curricular units 1st sem (without evaluations)",
+      "coefficient": 0.1878768453143166
+    },
+    {
+      "feature": "Previous qualification",
+      "coefficient": 0.1635268539723628
+    },
+    {
+      "feature": "Application mode",
+      "coefficient": -0.13952867123465623
+    },
+    {
+      "feature": "Curricular units 1st sem (evaluations)",
+      "coefficient": 0.13005849075063863
+    },
+    {
+      "feature": "Unemployment rate",
+      "coefficient": -0.12395327972323616
+    },
+    {
+      "feature": "Curricular units 2nd sem (without evaluations)",
+      "coefficient": 0.11533489424236375
+    },
+    {
+      "feature": "Father's qualification",
+      "coefficient": 0.10277051413826378
+    },
+    {
+      "feature": "GDP",
+      "coefficient": -0.09145115697113011
+    },
+    {
+      "feature": "Daytime/evening attendance\t",
+      "coefficient": -0.08582769046990661
+    },
+    {
+      "feature": "Marital status",
+      "coefficient": 0.07586210175822407
+    },
+    {
+      "feature": "Previous qualification (grade)",
+      "coefficient": -0.07382604570456465
+    },
+    {
+      "feature": "Admission grade",
+      "coefficient": 0.06636622661157908
+    },
+    {
+      "feature": "Mother's qualification",
+      "coefficient": -0.05960602912137761
+    },
+    {
+      "feature": "Application order",
+      "coefficient": -0.02756430990311611
+    },
+    {
+      "feature": "Inflation rate",
+      "coefficient": 0.0016776856356872146
+    },
+    {
+      "feature": "Educational special needs",
+      "coefficient": -0.0004318043811183271
+    }
+  ]
+}

dropout_binaryclass/predict_students_dropout_and_academic_success_model.pkl ADDED Viewed

File without changes

dropout_binaryclass/redundant_feature_pairs.csv ADDED Viewed

	@@ -0,0 +1,16 @@

+Feature 1,Feature 2,Correlation
+Curricular units 1st sem (credited),Curricular units 2nd sem (credited),0.9470934915899273
+Curricular units 1st sem (enrolled),Curricular units 2nd sem (enrolled),0.9412864966294326
+Curricular units 1st sem (approved),Curricular units 2nd sem (approved),0.9163339784914017
+Mother's occupation,Father's occupation,0.8865682817307416
+Curricular units 1st sem (grade),Curricular units 2nd sem (grade),0.8458637025340845
+Nacionality,International,0.7973873767851265
+Curricular units 1st sem (evaluations),Curricular units 2nd sem (evaluations),0.7906158307754103
+Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),0.7868376275910449
+Curricular units 1st sem (credited),Curricular units 1st sem (enrolled),0.7828630989223708
+Curricular units 1st sem (enrolled),Curricular units 1st sem (approved),0.7735791213004372
+Curricular units 1st sem (enrolled),Curricular units 2nd sem (credited),0.7632761218093532
+Curricular units 1st sem (approved),Curricular units 2nd sem (enrolled),0.7373747998128278
+Curricular units 1st sem (approved),Curricular units 1st sem (grade),0.7101565018864167
+Curricular units 1st sem (approved),Curricular units 2nd sem (grade),0.7093678199762506
+Curricular units 2nd sem (enrolled),Curricular units 2nd sem (approved),0.7044445310875675

dropout_binaryclass/target_correlations.csv ADDED Viewed

	@@ -0,0 +1,37 @@

+,correlation
+Curricular units 2nd sem (approved),0.6539952460991423
+Curricular units 2nd sem (grade),0.6053501259229878
+Curricular units 1st sem (approved),0.554880856533347
+Curricular units 1st sem (grade),0.5199270935327744
+Tuition fees up to date,0.4421375757680648
+Scholarship holder,0.313017662589069
+Age at enrollment,-0.2672293831633241
+Debtor,-0.26720719892947853
+Gender,-0.2519548119534265
+Application mode,-0.24450719808426288
+Curricular units 2nd sem (enrolled),0.18289654087432544
+Curricular units 1st sem (enrolled),0.1610735163889365
+Admission grade,0.128057716154513
+Displaced,0.12611303526795542
+Curricular units 2nd sem (evaluations),0.11923876678096997
+Previous qualification (grade),0.10946365310011318
+Curricular units 2nd sem (without evaluations),-0.1026868285766343
+Marital status,-0.10047906625607986
+Application order,0.09435462724757428
+Daytime/evening attendance	,0.08449593574263146
+Curricular units 1st sem (without evaluations),-0.07464226018538014
+Previous qualification,-0.06232290259631596
+Curricular units 1st sem (evaluations),0.05978625949022733
+Mother's qualification,-0.053988794962507865
+Curricular units 2nd sem (credited),0.052401971159116184
+GDP,0.05026014681835994
+Curricular units 1st sem (credited),0.04690001650294807
+Course,0.038135402995266764
+Inflation rate,-0.030325865974636136
+Nacionality,-0.015516308396310501
+Educational special needs,-0.007253654142177353
+International,0.006181262165854279
+Father's qualification,-0.005865479932260606
+Father's occupation,0.005065525427310094
+Unemployment rate,0.0041981052265261075
+Mother's occupation,0.0007724443649592459

dropout_binaryclass/train.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

dropout_binaryclass/train.py ADDED Viewed

	@@ -0,0 +1,224 @@

+#!/usr/bin/env python
+# coding: utf-8
+"""
+Student Dropout Prediction Model
+Trains a Logistic Regression model and saves it with feature configuration.
+"""
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import json
+import joblib
+from sklearn.model_selection import StratifiedKFold
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import roc_auc_score, classification_report, accuracy_score
+# =============================================================================
+# 1. LOAD AND PREPROCESS DATA
+# =============================================================================
+# Load data
+df = pd.read_csv('data.csv', sep=';')
+print(f"Original dataset shape: {df.shape}")
+# Filter out 'Enrolled' - keep only Dropout and Graduate
+df = df[df['Target'] != 'Enrolled']
+print(f"After filtering 'Enrolled': {df.shape}")
+# Round numeric columns
+df = df.round()
+# Convert specific columns to int64
+numeric_cols = [
+    'Admission grade',
+    'Previous qualification (grade)',
+    'Curricular units 1st sem (grade)',
+    'Curricular units 2nd sem (grade)',
+    'Unemployment rate',
+    'Inflation rate',
+    'GDP'
+]
+df[numeric_cols] = df[numeric_cols].astype(np.int64)
+# Drop unnecessary columns (selected by your classmate)
+columns_to_drop = [
+    "Father's occupation",
+    "Curricular units 2nd sem (credited)",
+    "Curricular units 2nd sem (enrolled)",
+    "Curricular units 2nd sem (approved)"
+]
+df.drop(columns=columns_to_drop, inplace=True)
+# Transform Target column
+df['Target'] = df['Target'].map({'Dropout': 0, 'Graduate': 1})
+# Verify target transformation
+print(f"\nTarget distribution:")
+print(df['Target'].value_counts())
+# Create features and target
+x = df.drop('Target', axis=1)
+y = df['Target'].astype(int)
+print(f"\nFeatures shape: {x.shape}")
+print(f"Target shape: {y.shape}")
+# =============================================================================
+# 2. DEFINE MODEL
+# =============================================================================
+model = Pipeline([
+    ('scaler', StandardScaler()),
+    ('clf', LogisticRegression(
+        penalty='l2',
+        C=1.0,
+        solver='lbfgs',
+        class_weight='balanced',
+        random_state=42,
+        max_iter=1000
+    ))
+])
+# =============================================================================
+# 3. CROSS-VALIDATION
+# =============================================================================
+print("\n" + "="*60)
+print("CROSS-VALIDATION RESULTS")
+print("="*60)
+skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
+auc_roc_scores = []
+acc_scores = []
+for fold, (train_index, val_index) in enumerate(skf.split(x, y), 1):
+    x_train, x_val = x.iloc[train_index], x.iloc[val_index]
+    y_train, y_val = y.iloc[train_index], y.iloc[val_index]
+    model.fit(x_train, y_train)
+    y_pred = model.predict(x_val)
+    y_pred_proba = model.predict_proba(x_val)[:, 1]
+    auc_roc = roc_auc_score(y_val, y_pred_proba)
+    acc = accuracy_score(y_val, y_pred)
+    auc_roc_scores.append(auc_roc)
+    acc_scores.append(acc)
+    print(f"\nFold {fold}:")
+    print(f"  Accuracy: {acc:.4f}, ROC-AUC: {auc_roc:.4f}")
+print("\n" + "-"*60)
+print(f"Average ROC-AUC: {np.mean(auc_roc_scores):.4f} ± {np.std(auc_roc_scores):.4f}")
+print(f"Average Accuracy: {np.mean(acc_scores):.4f} ± {np.std(acc_scores):.4f}")
+# =============================================================================
+# 4. TRAIN FINAL MODEL ON ALL DATA
+# =============================================================================
+print("\n" + "="*60)
+print("TRAINING FINAL MODEL ON ALL DATA")
+print("="*60)
+final_model = model.fit(x, y)
+print("Final model trained successfully!")
+# =============================================================================
+# 5. FEATURE IMPORTANCE
+# =============================================================================
+classifier = final_model.named_steps['clf']
+feature_importance = pd.DataFrame({
+    'feature': x.columns,
+    'coefficient': classifier.coef_[0]
+}).sort_values('coefficient', key=abs, ascending=False)
+print("\nTop 10 Most Important Features:")
+print(feature_importance.head(10).to_string(index=False))
+# Plot feature importance
+plt.figure(figsize=(10, 6))
+sns.barplot(data=feature_importance.head(10), x='coefficient', y='feature')
+plt.title('Top 10 Feature Importance (Logistic Regression Coefficients)')
+plt.tight_layout()
+plt.savefig('feature_importance.png', dpi=150)
+plt.show()
+# =============================================================================
+# 6. SAVE MODEL AND CONFIGURATION
+# =============================================================================
+print("\n" + "="*60)
+print("SAVING MODEL AND CONFIGURATION")
+print("="*60)
+# Save model using joblib (better for sklearn models)
+model_path = "student_dropout_model.pkl"
+joblib.dump(final_model, model_path)
+print(f"Model saved to: {model_path}")
+# Create and save configuration
+config = {
+    "model_name": "Student Dropout Prediction Model",
+    "model_type": "LogisticRegression with StandardScaler",
+    "target_mapping": {
+        "0": "Dropout",
+        "1": "Graduate"
+    },
+    "features": x.columns.tolist(),
+    "num_features": len(x.columns),
+    "dropped_columns": columns_to_drop,
+    "feature_details": {},
+    "model_performance": {
+        "avg_roc_auc": round(np.mean(auc_roc_scores), 4),
+        "std_roc_auc": round(np.std(auc_roc_scores), 4),
+        "avg_accuracy": round(np.mean(acc_scores), 4),
+        "std_accuracy": round(np.std(acc_scores), 4)
+    },
+    "feature_importance": feature_importance.to_dict('records')
+}
+# Add feature details (dtype, min, max, etc.)
+for col in x.columns:
+    config["feature_details"][col] = {
+        "dtype": str(x[col].dtype),
+        "min": float(x[col].min()),
+        "max": float(x[col].max()),
+        "mean": float(x[col].mean()),
+        "example_value": int(x[col].iloc[0]) if x[col].dtype in ['int64', 'int32'] else float(x[col].iloc[0])
+    }
+# Save configuration
+config_path = "model_config.json"
+with open(config_path, 'w') as f:
+    json.dump(config, f, indent=2)
+print(f"Configuration saved to: {config_path}")
+# =============================================================================
+# 7. PRINT SUMMARY
+# =============================================================================
+print("\n" + "="*60)
+print("SUMMARY: FEATURES YOUR CLASSMATE SELECTED")
+print("="*60)
+print(f"\nTotal features: {len(x.columns)}")
+print("\nFeature list:")
+for i, col in enumerate(x.columns, 1):
+    print(f"  {i:2d}. {col}")
+print(f"\nDropped columns:")
+for col in columns_to_drop:
+    print(f"  - {col}")
+print("\n" + "="*60)
+print("DONE! Files created:")
+print(f"  1. {model_path} (trained model)")
+print(f"  2. {config_path} (feature configuration)")
+print(f"  3. feature_importance.png (visualization)")
+print("="*60)

grade_multiclass/02_grade_distribution.png ADDED Viewed

grade_multiclass/03_performance_index_distribution.png ADDED Viewed

grade_multiclass/04_features_by_grade.png ADDED Viewed

grade_multiclass/05_extracurricular_analysis.png ADDED Viewed

grade_multiclass/06_correlation_heatmap.png ADDED Viewed

grade_multiclass/09_feature_importance.png ADDED Viewed

grade_multiclass/10_learning_curves.png ADDED Viewed

grade_multiclass/11_model_comparison.png ADDED Viewed

grade_multiclass/Student_Performance.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

grade_multiclass/correlation_heatmap.png ADDED Viewed

grade_multiclass/feature_importance.png ADDED Viewed

grade_multiclass/features_by_grade.png ADDED Viewed

grade_multiclass/learning_curves.png ADDED Viewed

grade_multiclass/model_comparison.png ADDED Viewed

grade_multiclass/student_performance_classification.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

grade_multiclass/student_performance_classification.py ADDED Viewed

	@@ -0,0 +1,1100 @@

+#!/usr/bin/env python
+# coding: utf-8
+"""
+Student Performance Multi-Class Classification
+==============================================
+Predicting student grades from study habits, historical performance,
+and lifestyle factors.
+Dataset: 10,000 student records with 5 features
+Target: Performance Index → Converted to letter grades (A/B/C/D/F)
+"""
+# =============================================================================
+# 1. IMPORTS AND CONFIGURATION
+# =============================================================================
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import joblib
+import warnings
+from pathlib import Path
+from sklearn.model_selection import (
+    train_test_split,
+    cross_val_score,
+    StratifiedKFold,
+    GridSearchCV,
+    learning_curve
+)
+from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
+from sklearn.metrics import (
+    classification_report,
+    confusion_matrix,
+    ConfusionMatrixDisplay,
+    accuracy_score,
+    f1_score
+)
+from sklearn.utils.class_weight import compute_class_weight
+# Configuration
+warnings.filterwarnings('ignore')
+sns.set_theme(style="whitegrid", palette="muted")
+plt.rcParams["figure.figsize"] = (10, 6)
+RANDOM_STATE = 42
+CV_FOLDS = 5
+print("=" * 60)
+print("   STUDENT PERFORMANCE CLASSIFICATION")
+print("   Multi-Class Grade Prediction from Academic Factors")
+print("=" * 60)
+# =============================================================================
+# 2. DATA LOADING AND INITIAL INSPECTION
+# =============================================================================
+def load_and_inspect_data(filepath: str) -> pd.DataFrame:
+    """Load dataset and perform initial inspection."""
+    df = pd.read_csv(filepath)
+    print("\n📊 DATASET OVERVIEW")
+    print("-" * 40)
+    print(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns")
+    print(f"\nColumns: {list(df.columns)}")
+    print(f"\nData Types:\n{df.dtypes}")
+    print(f"\nMissing Values:\n{df.isnull().sum()}")
+    print(f"\nBasic Statistics:\n{df.describe()}")
+    # Check categorical column
+    print(f"\nExtracurricular Activities Distribution:")
+    print(df['Extracurricular Activities'].value_counts())
+    return df
+# Load data
+df = load_and_inspect_data('Student_Performance.csv')
+print("\nFirst 10 rows:")
+print(df.head(10))
+# =============================================================================
+# 3. TARGET VARIABLE CREATION
+# =============================================================================
+def create_grade_labels(performance_index: pd.Series) -> pd.Series:
+    """
+    Convert continuous Performance Index to letter grades.
+    Grading Scale:
+        A: 90-100
+        B: 80-89
+        C: 70-79
+        D: 60-69
+        F: 0-59
+    """
+    bins = [0, 60, 70, 80, 90, 101]
+    labels = ['F', 'D', 'C', 'B', 'A']
+    grades = pd.cut(
+        performance_index,
+        bins=bins,
+        labels=labels,
+        right=False,
+        include_lowest=True
+    )
+    return grades
+# Create target variable
+df['grade'] = create_grade_labels(df['Performance Index'])
+print("\n🎯 TARGET VARIABLE CREATED")
+print("-" * 40)
+print("Grade Distribution:")
+grade_counts = df['grade'].value_counts().sort_index()
+for grade in ['A', 'B', 'C', 'D', 'F']:
+    count = grade_counts.get(grade, 0)
+    pct = count / len(df) * 100
+    bar = "█" * int(pct / 2)
+    print(f"  {grade}: {count:>5} ({pct:>5.2f}%) {bar}")
+# Check imbalance
+imbalance_ratio = grade_counts.max() / grade_counts.min()
+print(f"\nImbalance Ratio: {imbalance_ratio:.2f}")
+if imbalance_ratio > 10:
+    print("⚠️  Significant imbalance - will use class weights")
+else:
+    print("✅ Classes are reasonably balanced")
+# =============================================================================
+# 4. EXPLORATORY DATA ANALYSIS
+# =============================================================================
+def perform_eda(df: pd.DataFrame):
+    """Comprehensive exploratory data analysis."""
+    print("\n📈 EXPLORATORY DATA ANALYSIS")
+    print("=" * 60)
+    # Define feature groups
+    numerical_features = [
+        'Hours Studied',
+        'Previous Scores',
+        'Sleep Hours',
+        'Sample Question Papers Practiced'
+    ]
+    categorical_features = ['Extracurricular Activities']
+    # 4.1 Numerical Feature Distributions
+    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+    axes = axes.flatten()
+    for i, col in enumerate(numerical_features):
+        sns.histplot(df[col], kde=True, ax=axes[i], color='teal', bins=30)
+        axes[i].axvline(df[col].mean(), color='red', linestyle='--',
+                       label=f'Mean: {df[col].mean():.1f}')
+        axes[i].axvline(df[col].median(), color='orange', linestyle='--',
+                       label=f'Median: {df[col].median():.1f}')
+        axes[i].set_title(f'Distribution of {col}')
+        axes[i].legend()
+    plt.tight_layout()
+    plt.savefig('01_feature_distributions.png', dpi=150, bbox_inches='tight')
+    plt.show()
+    # 4.2 Target Distribution
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+    grade_order = ['A', 'B', 'C', 'D', 'F']
+    grade_counts = df['grade'].value_counts().reindex(grade_order)
+    colors = sns.color_palette('RdYlGn_r', 5)
+    # Bar chart
+    bars = axes[0].bar(grade_order, grade_counts.values, color=colors)
+    axes[0].set_title('Grade Distribution', fontsize=14)
+    axes[0].set_xlabel('Grade')
+    axes[0].set_ylabel('Count')
+    for bar, count in zip(bars, grade_counts.values):
+        axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 20,
+                    f'{count}', ha='center', fontsize=11)
+    # Pie chart
+    axes[1].pie(grade_counts, labels=grade_order, autopct='%1.1f%%',
+                colors=colors, explode=[0.02]*5)
+    axes[1].set_title('Grade Distribution (%)', fontsize=14)
+    plt.tight_layout()
+    plt.savefig('02_grade_distribution.png', dpi=150, bbox_inches='tight')
+    plt.show()
+    # 4.3 Performance Index Distribution (before binning)
+    plt.figure(figsize=(12, 5))
+    sns.histplot(df['Performance Index'], kde=True, bins=50, color='steelblue')
+    # Add grade boundary lines
+    boundaries = [60, 70, 80, 90]
+    boundary_labels = ['F/D', 'D/C', 'C/B', 'B/A']
+    for bound, label in zip(boundaries, boundary_labels):
+        plt.axvline(bound, color='red', linestyle='--', alpha=0.7)
+        plt.text(bound + 1, plt.gca().get_ylim()[1] * 0.9, label, fontsize=10)
+    plt.title('Performance Index Distribution with Grade Boundaries')
+    plt.xlabel('Performance Index')
+    plt.savefig('03_performance_index_distribution.png', dpi=150, bbox_inches='tight')
+    plt.show()
+    # 4.4 Features by Grade (Box Plots)
+    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+    axes = axes.flatten()
+    for i, col in enumerate(numerical_features):
+        sns.boxplot(data=df, x='grade', y=col, order=grade_order,
+                    hue='grade', palette='RdYlGn_r', legend=False, ax=axes[i])
+        axes[i].set_title(f'{col} by Grade')
+    plt.tight_layout()
+    plt.savefig('04_features_by_grade.png', dpi=150, bbox_inches='tight')
+    plt.show()
+    # 4.5 Extracurricular Activities Analysis
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+    # Grade distribution by extracurricular
+    ct = pd.crosstab(df['Extracurricular Activities'], df['grade'], normalize='index') * 100
+    ct = ct[grade_order]
+    ct.plot(kind='bar', ax=axes[0], color=colors, edgecolor='black')
+    axes[0].set_title('Grade Distribution by Extracurricular Activities')
+    axes[0].set_ylabel('Percentage')
+    axes[0].set_xticklabels(['No', 'Yes'], rotation=0)
+    axes[0].legend(title='Grade', bbox_to_anchor=(1.02, 1))
+    # Performance Index by extracurricular
+    sns.boxplot(data=df, x='Extracurricular Activities', y='Performance Index',
+                hue='Extracurricular Activities', palette='Set2', legend=False, ax=axes[1])
+    axes[1].set_title('Performance Index by Extracurricular Activities')
+    plt.tight_layout()
+    plt.savefig('05_extracurricular_analysis.png', dpi=150, bbox_inches='tight')
+    plt.show()
+    # 4.6 Correlation Analysis
+    plt.figure(figsize=(10, 8))
+    # Create correlation matrix (encode extracurricular for correlation)
+    df_corr = df.copy()
+    df_corr['Extracurricular (encoded)'] = (df_corr['Extracurricular Activities'] == 'Yes').astype(int)
+    corr_cols = numerical_features + ['Extracurricular (encoded)', 'Performance Index']
+    corr_matrix = df_corr[corr_cols].corr()
+    mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
+    sns.heatmap(corr_matrix, annot=True, cmap='RdBu_r', center=0,
+                mask=mask, square=True, linewidths=0.5, fmt='.2f')
+    plt.title('Feature Correlation Heatmap')
+    plt.tight_layout()
+    plt.savefig('06_correlation_heatmap.png', dpi=150, bbox_inches='tight')
+    plt.show()
+    # 4.7 Pairplot for key relationships
+    print("\nGenerating pairplot (this may take a moment)...")
+    key_features = ['Hours Studied', 'Previous Scores', 'Performance Index']
+    sample_df = df.sample(n=min(2000, len(df)), random_state=RANDOM_STATE)
+    g = sns.pairplot(sample_df, vars=key_features, hue='grade',
+                     hue_order=grade_order, palette='RdYlGn_r',
+                     diag_kind='kde', plot_kws={'alpha': 0.6})
+    g.fig.suptitle('Feature Relationships by Grade', y=1.02)
+    plt.savefig('07_pairplot.png', dpi=150, bbox_inches='tight')
+    plt.show()
+    # 4.8 Print correlation insights
+    print("\n📊 CORRELATION INSIGHTS")
+    print("-" * 40)
+    perf_corr = corr_matrix['Performance Index'].drop('Performance Index').sort_values(ascending=False)
+    print("Correlation with Performance Index:")
+    for feat, corr in perf_corr.items():
+        indicator = "↑↑" if corr > 0.5 else "↑" if corr > 0.3 else "→" if corr > -0.3 else "↓"
+        print(f"  {indicator} {feat}: {corr:.3f}")
+perform_eda(df)
+# =============================================================================
+# 5. DATA PREPROCESSING
+# =============================================================================
+class StudentDataPreprocessor:
+    """Handles all data preprocessing steps."""
+    def __init__(self):
+        self.numerical_features = [
+            'Hours Studied',
+            'Previous Scores',
+            'Sleep Hours',
+            'Sample Question Papers Practiced'
+        ]
+        self.categorical_features = ['Extracurricular Activities']
+        self.all_features = self.numerical_features + self.categorical_features
+        self.scaler = StandardScaler()
+        self.label_encoder = LabelEncoder()
+        self.onehot_encoder = OneHotEncoder(drop='first', sparse_output=False)
+        self.grade_mapping = None
+        self.class_weights = None
+        self.is_fitted = False
+    def fit_transform(self, df: pd.DataFrame):
+        """Fit preprocessors and transform data."""
+        # Extract features
+        X_numerical = df[self.numerical_features].copy()
+        X_categorical = df[self.categorical_features].copy()
+        y = df['grade'].copy()
+        # Encode target
+        y_encoded = self.label_encoder.fit_transform(y)
+        self.grade_mapping = dict(zip(
+            self.label_encoder.classes_,
+            self.label_encoder.transform(self.label_encoder.classes_)
+        ))
+        # Compute class weights
+        classes = np.unique(y_encoded)
+        weights = compute_class_weight('balanced', classes=classes, y=y_encoded)
+        self.class_weights = dict(zip(classes, weights))
+        # Scale numerical features
+        X_numerical_scaled = self.scaler.fit_transform(X_numerical)
+        # Encode categorical features
+        X_categorical_encoded = self.onehot_encoder.fit_transform(X_categorical)
+        # Combine features
+        X_combined = np.hstack([X_numerical_scaled, X_categorical_encoded])
+        # Get feature names for later
+        cat_feature_names = self.onehot_encoder.get_feature_names_out(self.categorical_features)
+        self.feature_names = self.numerical_features + list(cat_feature_names)
+        self.is_fitted = True
+        print("\n🔧 PREPROCESSING COMPLETE")
+        print("-" * 40)
+        print(f"Numerical features: {self.numerical_features}")
+        print(f"Categorical features: {self.categorical_features}")
+        print(f"Total features after encoding: {len(self.feature_names)}")
+        print(f"\nFeature names: {self.feature_names}")
+        print(f"\nTarget Mapping: {self.grade_mapping}")
+        print(f"\nClass Weights:")
+        for cls, weight in self.class_weights.items():
+            grade = self.get_grade_from_encoding(cls)
+            print(f"  {grade}: {weight:.4f}")
+        return X_combined, y_encoded
+    def transform(self, df: pd.DataFrame):
+        """Transform new data using fitted preprocessors."""
+        if not self.is_fitted:
+            raise ValueError("Preprocessor must be fitted before transforming.")
+        X_numerical = df[self.numerical_features].copy()
+        X_categorical = df[self.categorical_features].copy()
+        X_numerical_scaled = self.scaler.transform(X_numerical)
+        X_categorical_encoded = self.onehot_encoder.transform(X_categorical)
+        return np.hstack([X_numerical_scaled, X_categorical_encoded])
+    def transform_single(self, hours_studied, previous_scores, sleep_hours,
+                         sample_papers, extracurricular):
+        """Transform a single sample for prediction."""
+        if not self.is_fitted:
+            raise ValueError("Preprocessor must be fitted before transforming.")
+        df = pd.DataFrame({
+            'Hours Studied': [hours_studied],
+            'Previous Scores': [previous_scores],
+            'Sleep Hours': [sleep_hours],
+            'Sample Question Papers Practiced': [sample_papers],
+            'Extracurricular Activities': [extracurricular]
+        })
+        return self.transform(df)
+    def get_grade_from_encoding(self, encoding: int) -> str:
+        """Get grade letter from numeric encoding."""
+        inv_map = {v: k for k, v in self.grade_mapping.items()}
+        return inv_map[encoding]
+    def save(self, filepath: str):
+        """Save preprocessor to disk."""
+        joblib.dump(self, filepath)
+    @staticmethod
+    def load(filepath: str):
+        """Load preprocessor from disk."""
+        return joblib.load(filepath)
+# Initialize and fit preprocessor
+preprocessor = StudentDataPreprocessor()
+X, y = preprocessor.fit_transform(df)
+# =============================================================================
+# 6. TRAIN/TEST SPLIT
+# =============================================================================
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y,
+    test_size=0.20,
+    random_state=RANDOM_STATE,
+    stratify=y
+)
+print("\n📂 DATA SPLIT")
+print("-" * 40)
+print(f"Training set: {X_train.shape[0]:,} samples ({X_train.shape[0]/len(y)*100:.1f}%)")
+print(f"Testing set:  {X_test.shape[0]:,} samples ({X_test.shape[0]/len(y)*100:.1f}%)")
+print(f"Features: {X_train.shape[1]}")
+print(f"\nTraining set class distribution:")
+unique, counts = np.unique(y_train, return_counts=True)
+for u, c in zip(unique, counts):
+    print(f"  {preprocessor.get_grade_from_encoding(u)}: {c:,} ({c/len(y_train)*100:.1f}%)")
+# =============================================================================
+# 7. MODEL TRAINING WITH CROSS-VALIDATION
+# =============================================================================
+def cross_validate_model(model, X, y, cv_folds: int = 5, model_name: str = "Model"):
+    """Perform cross-validation and return detailed metrics."""
+    cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=RANDOM_STATE)
+    accuracy_scores = cross_val_score(model, X, y, cv=cv, scoring='accuracy', n_jobs=-1)
+    f1_macro_scores = cross_val_score(model, X, y, cv=cv, scoring='f1_macro', n_jobs=-1)
+    f1_weighted_scores = cross_val_score(model, X, y, cv=cv, scoring='f1_weighted', n_jobs=-1)
+    results = {
+        'model_name': model_name,
+        'accuracy_mean': accuracy_scores.mean(),
+        'accuracy_std': accuracy_scores.std(),
+        'f1_macro_mean': f1_macro_scores.mean(),
+        'f1_macro_std': f1_macro_scores.std(),
+        'f1_weighted_mean': f1_weighted_scores.mean(),
+        'f1_weighted_std': f1_weighted_scores.std(),
+    }
+    print(f"\n{model_name} - {cv_folds}-Fold Cross-Validation:")
+    print(f"  Accuracy:    {results['accuracy_mean']:.4f} ± {results['accuracy_std']:.4f}")
+    print(f"  F1 (Macro):  {results['f1_macro_mean']:.4f} ± {results['f1_macro_std']:.4f}")
+    print(f"  F1 (Weight): {results['f1_weighted_mean']:.4f} ± {results['f1_weighted_std']:.4f}")
+    return results
+print("\n🤖 MODEL TRAINING WITH CROSS-VALIDATION")
+print("=" * 60)
+# Define models
+models = {
+    'Logistic Regression': LogisticRegression(
+        solver='lbfgs',
+        max_iter=1000,
+        random_state=RANDOM_STATE,
+        class_weight='balanced',
+        n_jobs=-1
+    ),
+    'Random Forest': RandomForestClassifier(
+        n_estimators=100,
+        max_depth=15,
+        random_state=RANDOM_STATE,
+        class_weight='balanced',
+        n_jobs=-1
+    ),
+    'Gradient Boosting': GradientBoostingClassifier(
+        n_estimators=100,
+        max_depth=5,
+        random_state=RANDOM_STATE
+    )
+}
+# Cross-validate all models
+cv_results = {}
+for name, model in models.items():
+    cv_results[name] = cross_validate_model(model, X_train, y_train, CV_FOLDS, name)
+# =============================================================================
+# 8. HYPERPARAMETER TUNING
+# =============================================================================
+print("\n🔍 HYPERPARAMETER TUNING")
+print("=" * 60)
+# Tune Random Forest
+print("\nTuning Random Forest...")
+rf_param_grid = {
+    'n_estimators': [50, 100, 200],
+    'max_depth': [10, 15, 20, None],
+    'min_samples_split': [2, 5, 10],
+    'min_samples_leaf': [1, 2, 4]
+}
+rf_grid = GridSearchCV(
+    RandomForestClassifier(random_state=RANDOM_STATE, class_weight='balanced', n_jobs=-1),
+    rf_param_grid,
+    cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE),
+    scoring='f1_macro',
+    n_jobs=-1,
+    verbose=1
+)
+rf_grid.fit(X_train, y_train)
+print(f"\nRandom Forest Best Parameters: {rf_grid.best_params_}")
+print(f"Random Forest Best CV F1 (Macro): {rf_grid.best_score_:.4f}")
+# Tune Gradient Boosting
+print("\nTuning Gradient Boosting...")
+gb_param_grid = {
+    'n_estimators': [50, 100, 150],
+    'max_depth': [3, 5, 7],
+    'learning_rate': [0.05, 0.1, 0.2],
+    'min_samples_split': [2, 5]
+}
+gb_grid = GridSearchCV(
+    GradientBoostingClassifier(random_state=RANDOM_STATE),
+    gb_param_grid,
+    cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE),
+    scoring='f1_macro',
+    n_jobs=-1,
+    verbose=1
+)
+gb_grid.fit(X_train, y_train)
+print(f"\nGradient Boosting Best Parameters: {gb_grid.best_params_}")
+print(f"Gradient Boosting Best CV F1 (Macro): {gb_grid.best_score_:.4f}")
+# Select best model
+best_models = {
+    'Random Forest': (rf_grid.best_estimator_, rf_grid.best_score_),
+    'Gradient Boosting': (gb_grid.best_estimator_, gb_grid.best_score_)
+}
+best_model_name = max(best_models.keys(), key=lambda k: best_models[k][1])
+best_model = best_models[best_model_name][0]
+print(f"\n🏆 Best Model: {best_model_name}")
+# =============================================================================
+# 9. FINAL MODEL EVALUATION
+# =============================================================================
+def comprehensive_evaluation(model, X_test, y_test, preprocessor, model_name: str):
+    """Comprehensive model evaluation with visualizations."""
+    y_pred = model.predict(X_test)
+    y_proba = model.predict_proba(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    f1_macro = f1_score(y_test, y_pred, average='macro')
+    f1_weighted = f1_score(y_test, y_pred, average='weighted')
+    print(f"\n{'='*60}")
+    print(f"📊 {model_name} - TEST SET EVALUATION")
+    print(f"{'='*60}")
+    print(f"\nOverall Metrics:")
+    print(f"  Accuracy:          {accuracy:.4f} ({accuracy*100:.2f}%)")
+    print(f"  F1 Score (Macro):  {f1_macro:.4f}")
+    print(f"  F1 Score (Weight): {f1_weighted:.4f}")
+    print(f"\nDetailed Classification Report:")
+    print(classification_report(
+        y_test, y_pred,
+        target_names=preprocessor.label_encoder.classes_,
+        zero_division=0
+    ))
+    # Confusion Matrices
+    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+    cm = confusion_matrix(y_test, y_pred)
+    disp = ConfusionMatrixDisplay(
+        confusion_matrix=cm,
+        display_labels=preprocessor.label_encoder.classes_
+    )
+    disp.plot(cmap='Blues', ax=axes[0])
+    axes[0].set_title(f'Confusion Matrix - {model_name}')
+    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
+    disp_norm = ConfusionMatrixDisplay(
+        confusion_matrix=cm_normalized,
+        display_labels=preprocessor.label_encoder.classes_
+    )
+    disp_norm.plot(cmap='Blues', ax=axes[1], values_format='.2%')
+    axes[1].set_title(f'Normalized Confusion Matrix - {model_name}')
+    plt.tight_layout()
+    plt.savefig(f'08_confusion_matrix_{model_name.lower().replace(" ", "_")}.png',
+                dpi=150, bbox_inches='tight')
+    plt.show()
+    return {
+        'accuracy': accuracy,
+        'f1_macro': f1_macro,
+        'f1_weighted': f1_weighted,
+        'y_pred': y_pred,
+        'y_proba': y_proba
+    }
+final_results = comprehensive_evaluation(best_model, X_test, y_test, preprocessor, best_model_name)
+# =============================================================================
+# 10. FEATURE IMPORTANCE ANALYSIS
+# =============================================================================
+def plot_feature_importance(model, feature_names: list, model_name: str):
+    """Visualize feature importances."""
+    if hasattr(model, 'feature_importances_'):
+        importances = model.feature_importances_
+    else:
+        print("Model doesn't support feature importance extraction.")
+        return
+    indices = np.argsort(importances)[::-1]
+    print(f"\n📊 Feature Importance - {model_name}")
+    print("-" * 40)
+    for i, idx in enumerate(indices):
+        print(f"  {i+1}. {feature_names[idx]}: {importances[idx]:.4f} ({importances[idx]*100:.1f}%)")
+    plt.figure(figsize=(10, 6))
+    colors = sns.color_palette('viridis', len(feature_names))
+    bars = plt.barh(range(len(indices)), importances[indices], color=colors)
+    plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
+    plt.xlabel('Feature Importance')
+    plt.title(f'Feature Importance - {model_name}')
+    plt.gca().invert_yaxis()
+    for bar, imp in zip(bars, importances[indices]):
+        plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2,
+                 f'{imp:.3f}', va='center', fontsize=10)
+    plt.tight_layout()
+    plt.savefig('09_feature_importance.png', dpi=150, bbox_inches='tight')
+    plt.show()
+plot_feature_importance(best_model, preprocessor.feature_names, best_model_name)
+# =============================================================================
+# 11. LEARNING CURVES
+# =============================================================================
+def plot_learning_curves(model, X, y, model_name: str):
+    """Plot learning curves to diagnose bias/variance."""
+    print(f"\nGenerating learning curves for {model_name}...")
+    train_sizes, train_scores, val_scores = learning_curve(
+        model, X, y,
+        train_sizes=np.linspace(0.1, 1.0, 10),
+        cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE),
+        scoring='f1_macro',
+        n_jobs=-1
+    )
+    train_mean = train_scores.mean(axis=1)
+    train_std = train_scores.std(axis=1)
+    val_mean = val_scores.mean(axis=1)
+    val_std = val_scores.std(axis=1)
+    plt.figure(figsize=(10, 6))
+    plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std,
+                     alpha=0.1, color='blue')
+    plt.fill_between(train_sizes, val_mean - val_std, val_mean + val_std,
+                     alpha=0.1, color='orange')
+    plt.plot(train_sizes, train_mean, 'o-', color='blue', label='Training Score')
+    plt.plot(train_sizes, val_mean, 'o-', color='orange', label='Validation Score')
+    plt.xlabel('Training Set Size')
+    plt.ylabel('F1 Score (Macro)')
+    plt.title(f'Learning Curves - {model_name}')
+    plt.legend(loc='lower right')
+    plt.grid(True, alpha=0.3)
+    plt.tight_layout()
+    plt.savefig('10_learning_curves.png', dpi=150, bbox_inches='tight')
+    plt.show()
+    final_gap = train_mean[-1] - val_mean[-1]
+    print(f"\n📈 Learning Curve Analysis:")
+    print(f"  Final Training Score:   {train_mean[-1]:.4f}")
+    print(f"  Final Validation Score: {val_mean[-1]:.4f}")
+    print(f"  Gap: {final_gap:.4f}")
+    if final_gap > 0.1:
+        print("  ⚠️ High variance - model may be overfitting")
+    elif val_mean[-1] < 0.6:
+        print("  ⚠️ High bias - model may be underfitting")
+    else:
+        print("  ✅ Model appears well-balanced")
+# Create fresh model for learning curves
+if best_model_name == 'Random Forest':
+    model_for_curves = RandomForestClassifier(**rf_grid.best_params_,
+                                               random_state=RANDOM_STATE,
+                                               class_weight='balanced',
+                                               n_jobs=-1)
+else:
+    model_for_curves = GradientBoostingClassifier(**gb_grid.best_params_,
+                                                   random_state=RANDOM_STATE)
+plot_learning_curves(model_for_curves, X_train, y_train, best_model_name)
+# =============================================================================
+# 12. MODEL COMPARISON SUMMARY
+# =============================================================================
+def create_comparison_summary(cv_results: dict, best_model_name: str, final_accuracy: float):
+    """Create a summary comparison table."""
+    print("\n" + "=" * 60)
+    print("📋 MODEL COMPARISON SUMMARY")
+    print("=" * 60)
+    summary_data = []
+    for name, results in cv_results.items():
+        summary_data.append({
+            'Model': name,
+            'CV Accuracy': f"{results['accuracy_mean']:.4f} ± {results['accuracy_std']:.4f}",
+            'CV F1 (Macro)': f"{results['f1_macro_mean']:.4f} ± {results['f1_macro_std']:.4f}",
+            'CV F1 (Weighted)': f"{results['f1_weighted_mean']:.4f} ± {results['f1_weighted_std']:.4f}"
+        })
+    summary_df = pd.DataFrame(summary_data)
+    print(summary_df.to_string(index=False))
+    # Visualization
+    fig, ax = plt.subplots(figsize=(12, 6))
+    x = np.arange(len(cv_results))
+    width = 0.35
+    accuracies = [r['accuracy_mean'] for r in cv_results.values()]
+    f1_scores = [r['f1_macro_mean'] for r in cv_results.values()]
+    bars1 = ax.bar(x - width/2, accuracies, width, label='Accuracy', color='steelblue')
+    bars2 = ax.bar(x + width/2, f1_scores, width, label='F1 (Macro)', color='darkorange')
+    ax.set_ylabel('Score')
+    ax.set_title('Model Comparison - Cross-Validation Results')
+    ax.set_xticks(x)
+    ax.set_xticklabels(cv_results.keys())
+    ax.legend()
+    ax.set_ylim(0, 1.0)
+    for bar in bars1 + bars2:
+        height = bar.get_height()
+        ax.annotate(f'{height:.3f}', xy=(bar.get_x() + bar.get_width()/2, height),
+                    xytext=(0, 3), textcoords="offset points", ha='center', fontsize=9)
+    plt.tight_layout()
+    plt.savefig('11_model_comparison.png', dpi=150, bbox_inches='tight')
+    plt.show()
+create_comparison_summary(cv_results, best_model_name, final_results['accuracy'])
+# =============================================================================
+# 13. AGENT-READY PREDICTION CLASS
+# =============================================================================
+class StudentGradePredictor:
+    """
+    Production-ready grade prediction class for agent integration.
+    """
+    def __init__(self, model, preprocessor: StudentDataPreprocessor):
+        self.model = model
+        self.preprocessor = preprocessor
+        self.grade_order = ['A', 'B', 'C', 'D', 'F']
+        self.valid_ranges = {
+            'hours_studied': (0, 50),
+            'previous_scores': (0, 100),
+            'sleep_hours': (0, 24),
+            'sample_papers': (0, 20),
+            'extracurricular': ['Yes', 'No']
+        }
+    def validate_input(self, hours_studied, previous_scores, sleep_hours,
+                       sample_papers, extracurricular) -> tuple:
+        """Validate input values."""
+        errors = []
+        # Check numerical ranges
+        checks = [
+            ('hours_studied', hours_studied, self.valid_ranges['hours_studied']),
+            ('previous_scores', previous_scores, self.valid_ranges['previous_scores']),
+            ('sleep_hours', sleep_hours, self.valid_ranges['sleep_hours']),
+            ('sample_papers', sample_papers, self.valid_ranges['sample_papers']),
+        ]
+        for name, value, (min_val, max_val) in checks:
+            if not (min_val <= value <= max_val):
+                errors.append(f"{name} must be between {min_val} and {max_val} (got {value})")
+        # Check categorical
+        if extracurricular not in self.valid_ranges['extracurricular']:
+            errors.append(f"extracurricular must be 'Yes' or 'No' (got {extracurricular})")
+        if errors:
+            return False, "; ".join(errors)
+        return True, "Valid"
+    def predict(self, hours_studied: float, previous_scores: float,
+                sleep_hours: float, sample_papers: int,
+                extracurricular: str) -> dict:
+        """
+        Make a grade prediction with confidence scores.
+        Parameters:
+        -----------
+        hours_studied : float - Total hours spent studying (0-50)
+        previous_scores : float - Previous test scores (0-100)
+        sleep_hours : float - Average daily sleep hours (0-24)
+        sample_papers : int - Number of practice papers completed (0-20)
+        extracurricular : str - Participates in extracurricular activities ('Yes'/'No')
+        Returns:
+        --------
+        dict : Prediction results
+        """
+        # Validate input
+        is_valid, message = self.validate_input(
+            hours_studied, previous_scores, sleep_hours, sample_papers, extracurricular
+        )
+        if not is_valid:
+            return {
+                'success': False,
+                'error': message,
+                'predicted_grade': None,
+                'confidence': None
+            }
+        # Transform input
+        X = self.preprocessor.transform_single(
+            hours_studied, previous_scores, sleep_hours,
+            sample_papers, extracurricular
+        )
+        # Predict
+        prediction = self.model.predict(X)[0]
+        probabilities = self.model.predict_proba(X)[0]
+        predicted_grade = self.preprocessor.get_grade_from_encoding(prediction)
+        confidence = probabilities[prediction]
+        # Probability distribution
+        prob_distribution = {}
+        for i, grade in enumerate(self.preprocessor.label_encoder.classes_):
+            prob_distribution[grade] = round(probabilities[i] * 100, 2)
+        # Generate insights
+        recommendation = self._generate_recommendation(
+            predicted_grade, confidence, hours_studied, previous_scores,
+            sleep_hours, sample_papers, extracurricular
+        )
+        confidence_level = self._get_confidence_level(confidence)
+        return {
+            'success': True,
+            'predicted_grade': predicted_grade,
+            'confidence': round(confidence * 100, 2),
+            'confidence_level': confidence_level,
+            'probability_distribution': prob_distribution,
+            'input_summary': {
+                'hours_studied': hours_studied,
+                'previous_scores': previous_scores,
+                'sleep_hours': sleep_hours,
+                'sample_papers': sample_papers,
+                'extracurricular': extracurricular
+            },
+            'recommendation': recommendation,
+            'disclaimer': (
+                "This prediction is based on statistical patterns and should inform, "
+                "not replace, professional educator judgment."
+            )
+        }
+    def _get_confidence_level(self, confidence: float) -> str:
+        if confidence >= 0.7:
+            return "HIGH"
+        elif confidence >= 0.4:
+            return "MODERATE"
+        else:
+            return "LOW"
+    def _generate_recommendation(self, grade, confidence, hours_studied,
+                                  previous_scores, sleep_hours, sample_papers,
+                                  extracurricular):
+        """Generate actionable recommendations."""
+        recommendations = []
+        if grade in ['D', 'F']:
+            recommendations.append("⚠️ Student may need intervention.")
+            if hours_studied < 5:
+                recommendations.append("📚 Study hours are very low - recommend study plan.")
+            if previous_scores < 60:
+                recommendations.append("📝 Previous performance concerning - consider tutoring.")
+            if sleep_hours < 6:
+                recommendations.append("😴 Sleep deprivation may be affecting performance.")
+            if sample_papers < 2:
+                recommendations.append("📋 More practice tests recommended.")
+        elif grade == 'C':
+            recommendations.append("📊 Average performance - room for improvement.")
+            if hours_studied < 7:
+                recommendations.append("📚 Increasing study hours could help.")
+            if sample_papers < 3:
+                recommendations.append("📋 More practice papers recommended.")
+        elif grade == 'B':
+            recommendations.append("👍 Good performance.")
+            if hours_studied < 8 or sample_papers < 4:
+                recommendations.append("📈 Small improvements could push to A grade.")
+        else:  # A
+            recommendations.append("🌟 Excellent! Student is performing very well.")
+        if confidence < 0.4:
+            recommendations.append("⚡ Low confidence - consider additional assessment.")
+        return " ".join(recommendations)
+    def predict_batch(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Make predictions for multiple students."""
+        results = []
+        for _, row in df.iterrows():
+            result = self.predict(
+                row['Hours Studied'],
+                row['Previous Scores'],
+                row['Sleep Hours'],
+                row['Sample Question Papers Practiced'],
+                row['Extracurricular Activities']
+            )
+            results.append({
+                'predicted_grade': result.get('predicted_grade'),
+                'confidence': result.get('confidence'),
+                'confidence_level': result.get('confidence_level')
+            })
+        return pd.concat([df.reset_index(drop=True), pd.DataFrame(results)], axis=1)
+    def save(self, directory: str = 'model_artifacts'):
+        """Save all model artifacts."""
+        path = Path(directory)
+        path.mkdir(exist_ok=True)
+        joblib.dump(self.model, path / 'model.pkl')
+        joblib.dump(self.preprocessor, path / 'preprocessor.pkl')
+        joblib.dump(self.valid_ranges, path / 'valid_ranges.pkl')
+        print(f"✅ Model artifacts saved to '{directory}/'")
+    @classmethod
+    def load(cls, directory: str = 'model_artifacts'):
+        """Load model artifacts."""
+        path = Path(directory)
+        model = joblib.load(path / 'model.pkl')
+        preprocessor = joblib.load(path / 'preprocessor.pkl')
+        predictor = cls(model, preprocessor)
+        predictor.valid_ranges = joblib.load(path / 'valid_ranges.pkl')
+        print(f"✅ Model loaded from '{directory}/'")
+        return predictor
+# Initialize and save predictor
+predictor = StudentGradePredictor(best_model, preprocessor)
+predictor.save('model_artifacts')
+# =============================================================================
+# 14. INTERACTIVE DEMONSTRATION
+# =============================================================================
+def display_prediction_report(result: dict):
+    """Display a formatted prediction report."""
+    if not result['success']:
+        print(f"\n❌ PREDICTION FAILED: {result['error']}")
+        return
+    print("\n" + "=" * 60)
+    print("    🎓 STUDENT PERFORMANCE PREDICTION REPORT")
+    print("=" * 60)
+    inp = result['input_summary']
+    print(f"\n📋 INPUT PARAMETERS:")
+    print(f"   • Hours Studied:           {inp['hours_studied']:>6} h")
+    print(f"   • Previous Scores:         {inp['previous_scores']:>6}")
+    print(f"   • Sleep Hours:             {inp['sleep_hours']:>6} h/day")
+    print(f"   • Practice Papers:         {inp['sample_papers']:>6}")
+    print(f"   • Extracurricular:         {inp['extracurricular']:>6}")
+    print(f"\n🎯 PREDICTION:")
+    print(f"   • Predicted Grade:         {result['predicted_grade']}")
+    print(f"   • Confidence:              {result['confidence']:.1f}% ({result['confidence_level']})")
+    print(f"\n📊 PROBABILITY DISTRIBUTION:")
+    for grade in ['A', 'B', 'C', 'D', 'F']:
+        prob = result['probability_distribution'].get(grade, 0)
+        bar_length = int(prob / 5)
+        bar = "█" * bar_length
+        print(f"   {grade}: {bar:<20} {prob:>5.1f}%")
+    print(f"\n💡 RECOMMENDATION:")
+    print(f"   {result['recommendation']}")
+    print("=" * 60)
+print("\n" + "🧪 " * 20)
+print("        INTERACTIVE PREDICTION DEMONSTRATIONS")
+print("🧪 " * 20)
+# Test Case 1: High-performing student
+result1 = predictor.predict(
+    hours_studied=9,
+    previous_scores=95,
+    sleep_hours=8,
+    sample_papers=5,
+    extracurricular='Yes'
+)
+display_prediction_report(result1)
+# Test Case 2: Struggling student
+result2 = predictor.predict(
+    hours_studied=2,
+    previous_scores=45,
+    sleep_hours=5,
+    sample_papers=0,
+    extracurricular='No'
+)
+display_prediction_report(result2)
+# Test Case 3: Average student
+result3 = predictor.predict(
+    hours_studied=5,
+    previous_scores=70,
+    sleep_hours=7,
+    sample_papers=2,
+    extracurricular='Yes'
+)
+display_prediction_report(result3)
+# Test Case 4: Edge case - high previous scores but low effort
+result4 = predictor.predict(
+    hours_studied=1,
+    previous_scores=85,
+    sleep_hours=6,
+    sample_papers=1,
+    extracurricular='No'
+)
+display_prediction_report(result4)
+# Test Case 5: Invalid input
+result5 = predictor.predict(
+    hours_studied=-5,
+    previous_scores=150,
+    sleep_hours=30,
+    sample_papers=0,
+    extracurricular='Maybe'
+)
+display_prediction_report(result5)

grade_multiclass/target_distribution.png ADDED Viewed

lr_attendance/2018-2019_Daily_Attendance_20240429.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

lr_attendance/add_weather_features.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import pandas as pd
+import numpy as np
+import requests
+from datetime import datetime
+import time
+# Load the engineered attendance data
+df = pd.read_csv("attendance_with_features.csv")
+# NYC coordinates for Central Park
+NYC_LAT = 40.7789
+NYC_LON = -73.9692
+# Convert date column to datetime if not already
+df["date"] = pd.to_datetime(df["Date"], format="%Y%m%d")
+# Get unique dates from our dataset
+unique_dates = sorted(df["date"].dt.date.unique())
+print(
+    f"Fetching weather data for {len(unique_dates)} unique dates from {unique_dates[0]} to {unique_dates[-1]}"
+)
+def fetch_weather_data(start_date, end_date):
+    """Fetch weather data from Open-Meteo API"""
+    url = "https://archive-api.open-meteo.com/v1/archive"
+    params = {
+        "latitude": NYC_LAT,
+        "longitude": NYC_LON,
+        "start_date": start_date,
+        "end_date": end_date,
+        "daily": [
+            "temperature_2m_max",
+            "temperature_2m_min",
+            "temperature_2m_mean",
+            "precipitation_sum",
+            "rain_sum",
+            "snowfall_sum",
+            "precipitation_hours",
+            "wind_speed_10m_max",
+            "wind_gusts_10m_max",
+            "weather_code",
+            "sunshine_duration",
+            "daylight_duration",
+        ],
+        "timezone": "America/New_York",
+        "temperature_unit": "celsius",
+        "wind_speed_unit": "kmh",
+        "precipitation_unit": "mm",
+    }
+    try:
+        response = requests.get(url, params=params)
+        response.raise_for_status()
+        data = response.json()
+        # Convert to DataFrame
+        weather_df = pd.DataFrame(
+            {
+                "date": pd.to_datetime(data["daily"]["time"]).date,
+                "temp_max": data["daily"]["temperature_2m_max"],
+                "temp_min": data["daily"]["temperature_2m_min"],
+                "temp_mean": data["daily"]["temperature_2m_mean"],
+                "precipitation_total": data["daily"]["precipitation_sum"],
+                "rain_total": data["daily"]["rain_sum"],
+                "snow_total": data["daily"]["snowfall_sum"],
+                "precipitation_hours": data["daily"]["precipitation_hours"],
+                "wind_speed_max": data["daily"]["wind_speed_10m_max"],
+                "wind_gust_max": data["daily"]["wind_gusts_10m_max"],
+                "weather_code": data["daily"]["weather_code"],
+                "sunshine_duration": data["daily"]["sunshine_duration"],
+                "daylight_duration": data["daily"]["daylight_duration"],
+            }
+        )
+        return weather_df
+    except Exception as e:
+        print(f"Error fetching weather data: {e}")
+        return None
+# Split date range into chunks to avoid API limits
+weather_data = []
+chunk_size = 365  # days per request
+for i in range(0, len(unique_dates), chunk_size):
+    chunk_dates = unique_dates[i : i + chunk_size]
+    start_date = chunk_dates[0].strftime("%Y-%m-%d")
+    end_date = chunk_dates[-1].strftime("%Y-%m-%d")
+    print(f"Fetching weather for {start_date} to {end_date}...")
+    chunk_weather = fetch_weather_data(start_date, end_date)
+    if chunk_weather is not None:
+        weather_data.append(chunk_weather)
+    # Rate limiting
+    time.sleep(1)
+# Combine all weather data
+if weather_data:
+    weather_df = pd.concat(weather_data, ignore_index=True)
+    print(f"Successfully fetched weather data for {len(weather_df)} days")
+    # Save weather data
+    weather_df.to_csv("nyc_weather_2018_2019.csv", index=False)
+    print("Weather data saved as 'nyc_weather_2018_2019.csv'")
+    # Merge with attendance data
+    df["date_key"] = df["date"].dt.date
+    weather_df["date_key"] = weather_df["date"]
+    # Merge weather features
+    attendance_with_weather = df.merge(
+        weather_df.drop("date", axis=1), on="date_key", how="left"
+    )
+    # Create weather-related features
+    attendance_with_weather["temp_range"] = (
+        attendance_with_weather["temp_max"] - attendance_with_weather["temp_min"]
+    )
+    attendance_with_weather["is_rainy_day"] = (
+        attendance_with_weather["precipitation_total"] > 2.0
+    ).astype(int)
+    attendance_with_weather["is_snowy_day"] = (
+        attendance_with_weather["snow_total"] > 0.5
+    ).astype(int)
+    attendance_with_weather["is_windy_day"] = (
+        attendance_with_weather["wind_speed_max"] > 20.0
+    ).astype(int)
+    attendance_with_weather["is_extreme_temp"] = (
+        (attendance_with_weather["temp_max"] > 32)
+        | (attendance_with_weather["temp_min"] < -5)
+    ).astype(int)
+    # Weather severity score (0-1, higher = worse conditions)
+    attendance_with_weather["weather_severity"] = (
+        attendance_with_weather["precipitation_total"] / 50  # normalize heavy rain
+        + attendance_with_weather["snow_total"] / 20  # normalize snow
+        + attendance_with_weather["wind_speed_max"] / 50  # normalize wind
+    ).clip(0, 1)
+    print("\nWeather features added:")
+    weather_features = [
+        col
+        for col in attendance_with_weather.columns
+        if col
+        in [
+            "temp_max",
+            "temp_min",
+            "temp_mean",
+            "temp_range",
+            "precipitation_total",
+            "rain_total",
+            "snow_total",
+            "precipitation_hours",
+            "wind_speed_max",
+            "wind_gust_max",
+            "weather_code",
+            "sunshine_duration",
+            "daylight_duration",
+            "is_rainy_day",
+            "is_snowy_day",
+            "is_windy_day",
+            "is_extreme_temp",
+            "weather_severity",
+        ]
+    ]
+    for feature in weather_features:
+        print(f"- {feature}")
+    # Save final dataset
+    attendance_with_weather.to_csv("attendance_features_complete.csv", index=False)
+    print(
+        f"\nFinal dataset with {len(attendance_with_weather.columns)} total features saved as 'attendance_features_complete.csv'"
+    )
+    print("\nSample of weather-related features:")
+    print(
+        attendance_with_weather[
+            [
+                "Date",
+                "attendance_rate",
+                "temp_mean",
+                "precipitation_total",
+                "is_rainy_day",
+                "weather_severity",
+            ]
+        ].head(10)
+    )
+else:
+    print("Failed to fetch weather data")

lr_attendance/best_model_coefficients.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+Feature,Coefficient
+school_avg_attendance,7.137911799297406
+temp_mean,1.4284187566515165
+day_of_week,-0.9758038207498889
+precipitation_hours,-0.7556588622303482
+is_holiday,-0.6112820331872162
+is_snowy_day,-0.6059281530329762
+is_monday,-0.5594793735265182
+school_std_attendance,0.5048872431769107
+is_rainy_day,0.1904241431075877
+days_to_next_holiday,0.13888911829632
+school_year_progress,0.09479814368925972
+is_friday,-0.016717455812732072

lr_attendance/explore_data.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import pandas as pd
+import numpy as np
+from datetime import datetime, timedelta
+import holidays
+# Load the data
+df = pd.read_csv("2018-2019_Daily_Attendance_20240429.csv")
+# Basic dataset analysis
+print("Dataset Info:")
+print(f"Total records: {len(df)}")
+print(f"Date range: {df['Date'].min()} to {df['Date'].max()}")
+print(f"Unique schools: {df['School DBN'].nunique()}")
+print("\nColumns:", df.columns.tolist())
+# Check for missing values
+print("\nMissing values:")
+print(df.isnull().sum())
+# Create attendance rate
+df["attendance_rate"] = (df["Present"] / df["Enrolled"]) * 100
+# Basic statistics
+print("\nAttendance Rate Statistics:")
+print(df["attendance_rate"].describe())
+print("\nSample data:")
+print(df.head())

lr_attendance/feature_engineering.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import pandas as pd
+import numpy as np
+from datetime import datetime
+import holidays
+# Load the data
+df = pd.read_csv("2018-2019_Daily_Attendance_20240429.csv")
+# Convert date column to datetime
+df["date"] = pd.to_datetime(df["Date"], format="%Y%m%d")
+# Create attendance rate (target variable)
+df["attendance_rate"] = (df["Present"] / df["Enrolled"]) * 100
+# Extract temporal features
+df["day_of_week"] = df["date"].dt.dayofweek  # 0=Monday, 6=Sunday
+df["day_of_week_name"] = df["date"].dt.day_name()
+df["month"] = df["date"].dt.month
+df["month_name"] = df["date"].dt.month_name()
+df["quarter"] = df["date"].dt.quarter
+df["week_of_year"] = df["date"].dt.isocalendar().week
+df["day_of_month"] = df["date"].dt.day
+df["day_of_year"] = df["date"].dt.dayofyear
+# Season mapping
+def get_season(month):
+    if month in [12, 1, 2]:
+        return "Winter"
+    elif month in [3, 4, 5]:
+        return "Spring"
+    elif month in [6, 7, 8]:
+        return "Summer"
+    else:
+        return "Fall"
+df["season"] = df["month"].apply(get_season)
+# Weekend indicator
+df["is_weekend"] = (df["day_of_week"] >= 5).astype(int)
+# School day indicators (assuming Mon-Fri are school days)
+df["is_school_day"] = (df["day_of_week"] < 5).astype(int)
+# NYC Public School Holidays for 2018-2019 school year
+nyc_holidays_2018_19 = [
+    "2018-09-10",  # Rosh Hashanah (Observed)
+    "2018-09-11",  # Rosh Hashanah (Observed)
+    "2018-09-19",  # Yom Kippur
+    "2018-10-08",  # Columbus Day
+    "2018-11-06",  # Election Day
+    "2018-11-12",  # Veterans Day
+    "2018-11-22",  # Thanksgiving Day
+    "2018-11-23",  # Thanksgiving Recess
+    "2018-12-24",  # Winter Recess
+    "2018-12-25",  # Christmas Day
+    "2018-12-26",  # Winter Recess
+    "2018-12-27",  # Winter Recess
+    "2018-12-28",  # Winter Recess
+    "2018-12-31",  # Winter Recess
+    "2019-01-01",  # New Year's Day
+    "2019-01-02",  # Winter Recess
+    "2019-01-21",  # Dr. Martin Luther King Jr. Day
+    "2019-02-18",  # Midwinter Recess
+    "2019-02-19",  # Midwinter Recess
+    "2019-02-20",  # Midwinter Recess
+    "2019-02-21",  # Midwinter Recess
+    "2019-02-22",  # Midwinter Recess
+    "2019-04-15",  # Spring Recess
+    "2019-04-16",  # Spring Recess
+    "2019-04-17",  # Spring Recess
+    "2019-04-18",  # Spring Recess
+    "2019-04-19",  # Spring Recess
+    "2019-04-22",  # Spring Recess
+    "2019-04-23",  # Spring Recess
+    "2019-04-24",  # Spring Recess
+    "2019-04-25",  # Spring Recess
+    "2019-05-27",  # Memorial Day
+    "2019-06-06",  # Chancellor's Conference Day
+    "2019-06-11",  # Anniversary Day
+]
+# Convert to datetime
+holiday_dates = pd.to_datetime(nyc_holidays_2018_19)
+# Add holiday indicators
+df["is_holiday"] = df["date"].isin(holiday_dates.tolist()).astype(int)
+# Add proximity to holiday features
+df["days_to_next_holiday"] = 0
+df["days_since_last_holiday"] = 0
+for idx, row in df.iterrows():
+    current_date = row["date"]
+    # Days to next holiday
+    future_holidays = holiday_dates[holiday_dates > current_date]
+    if len(future_holidays) > 0:
+        df.loc[idx, "days_to_next_holiday"] = (
+            future_holidays.min() - current_date
+        ).days
+    # Days since last holiday
+    past_holidays = holiday_dates[holiday_dates < current_date]
+    if len(past_holidays) > 0:
+        df.loc[idx, "days_since_last_holiday"] = (
+            current_date - past_holidays.max()
+        ).days
+# Special events/conditions that might affect attendance
+df["is_month_start"] = (df["day_of_month"] <= 3).astype(int)
+df["is_month_end"] = (df["day_of_month"] >= 28).astype(int)
+df["is_friday"] = (df["day_of_week"] == 4).astype(int)
+df["is_monday"] = (df["day_of_week"] == 0).astype(int)
+# Progress through school year (normalized)
+school_year_start = pd.to_datetime("2018-09-04")
+school_year_end = pd.to_datetime("2019-06-26")
+df["school_year_progress"] = (
+    (df["date"] - school_year_start).dt.days
+    / (school_year_end - school_year_start).days
+).clip(0, 1)
+print("Feature Engineering Complete!")
+print(f"Total features created: {len(df.columns)}")
+print("\nNew features added:")
+new_features = [
+    col
+    for col in df.columns
+    if col not in ["School DBN", "Date", "Enrolled", "Absent", "Present", "Released"]
+]
+for feature in new_features:
+    print(f"- {feature}")
+print("\nSample of engineered features:")
+print(
+    df[
+        [
+            "Date",
+            "attendance_rate",
+            "day_of_week_name",
+            "month_name",
+            "season",
+            "is_holiday",
+            "days_to_next_holiday",
+            "is_friday",
+        ]
+    ].head(10)
+)
+# Save engineered dataset
+df.to_csv("attendance_with_features.csv", index=False)
+print("\nDataset saved as 'attendance_with_features.csv'")

lr_attendance/feature_info.json ADDED Viewed

	@@ -0,0 +1,118 @@

+{
+  "final_features": [
+    "day_of_week",
+    "month",
+    "quarter",
+    "week_of_year",
+    "day_of_month",
+    "day_of_year",
+    "is_weekend",
+    "is_school_day",
+    "is_month_start",
+    "is_month_end",
+    "is_friday",
+    "is_monday",
+    "school_year_progress",
+    "is_holiday",
+    "days_to_next_holiday",
+    "days_since_last_holiday",
+    "temp_max",
+    "temp_min",
+    "temp_mean",
+    "temp_range",
+    "precipitation_total",
+    "rain_total",
+    "snow_total",
+    "precipitation_hours",
+    "wind_speed_max",
+    "wind_gust_max",
+    "sunshine_duration",
+    "daylight_duration",
+    "is_rainy_day",
+    "is_snowy_day",
+    "is_windy_day",
+    "is_extreme_temp",
+    "weather_severity",
+    "temp_humidity_interaction",
+    "wind_precip_interaction",
+    "holiday_weather_interaction",
+    "temp_squared",
+    "precipitation_squared",
+    "season_encoded"
+  ],
+  "temporal_features": [
+    "day_of_week",
+    "month",
+    "quarter",
+    "week_of_year",
+    "day_of_month",
+    "day_of_year",
+    "is_weekend",
+    "is_school_day",
+    "is_month_start",
+    "is_month_end",
+    "is_friday",
+    "is_monday",
+    "school_year_progress"
+  ],
+  "holiday_features": [
+    "is_holiday",
+    "days_to_next_holiday",
+    "days_since_last_holiday"
+  ],
+  "weather_features": [
+    "temp_max",
+    "temp_min",
+    "temp_mean",
+    "temp_range",
+    "precipitation_total",
+    "rain_total",
+    "snow_total",
+    "precipitation_hours",
+    "wind_speed_max",
+    "wind_gust_max",
+    "sunshine_duration",
+    "daylight_duration",
+    "is_rainy_day",
+    "is_snowy_day",
+    "is_windy_day",
+    "is_extreme_temp",
+    "weather_severity"
+  ],
+  "target_correlations": {
+    "attendance_rate": 1.0,
+    "school_year_progress": 0.15298705946434624,
+    "quarter": 0.09570330953211145,
+    "daylight_duration": 0.08974924827192297,
+    "is_snowy_day": 0.0851888567499552,
+    "days_to_next_holiday": 0.08037939994225182,
+    "month": 0.07982450259808478,
+    "week_of_year": 0.07477659348367248,
+    "day_of_year": 0.07443173468815607,
+    "day_of_month": 0.06875319490990786,
+    "snow_total": 0.06750581579691282,
+    "is_friday": 0.0642459731182472,
+    "is_holiday": 0.05859963560671229,
+    "precipitation_hours": 0.05855321374409788,
+    "is_rainy_day": 0.054753317922505866,
+    "precipitation_total": 0.0546754489544741,
+    "is_extreme_temp": 0.04877986561324521,
+    "day_of_week": 0.04745618753325818,
+    "rain_total": 0.04531486647524365,
+    "is_month_end": 0.03061269671737044,
+    "days_since_last_holiday": 0.029162742921903476,
+    "weather_severity": 0.027970448352187427,
+    "is_month_start": 0.023797389694042725,
+    "temp_mean": 0.013792886147146688,
+    "temp_min": 0.013129987102531162,
+    "temp_max": 0.012837652673607865,
+    "wind_gust_max": 0.010101066578409116,
+    "is_monday": 0.009588967551783255,
+    "sunshine_duration": 0.003997196985383076,
+    "wind_speed_max": 0.0018488325630909438,
+    "temp_range": 0.0014876996432690002,
+    "is_windy_day": 0.0009326180610937769,
+    "is_weekend": NaN,
+    "is_school_day": NaN
+  }
+}

lr_attendance/final_coefficients.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+Feature,Coefficient
+school_avg_attendance,7.171348715713736
+school_year_progress,-1.5605297921826944
+days_to_next_holiday,0.7496704390789359
+is_snowy_day,-0.66548297262725
+precipitation_hours,-0.4631543689630163
+is_friday,-0.3230532695248619
+day_of_week,-0.3176427595205633
+temp_mean,0.2923542560616456
+is_holiday,-0.24976999079401288
+is_monday,-0.21064021730157015
+is_rainy_day,0.037596682642466635
+school_std_attendance,-0.0019548833102194414

lr_attendance/final_predictions.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

lr_attendance/improved_predictions.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

lr_attendance/model_comparison.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+Model,R²,RMSE,MAE
+A: Baseline (no school),-0.19632912335295583,13.229195648314445,7.397234417574075
+B: With School Avg ⭐,0.23948647822468483,10.547792357860521,5.784459632103281
+C: Predict Deviation,0.256636459570448,10.428185068430691,5.835936867686833
+D: Log Transform,0.238213937586648,10.556613295212724,5.9642873221393735

lr_attendance/model_summary.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ r2_train,r2_test,rmse_test,mae_test,cv_r2_mean,cv_r2_std,n_train,n_test,n_features
2	+ 0.6090720669046676,0.609192651642787,5.943806665659979,3.1503211971515586,0.6089838172103861,0.004961794857370117,221720,55431,12

lr_attendance/nyc_weather_2018_2019.csv ADDED Viewed

	@@ -0,0 +1,297 @@

+date,temp_max,temp_min,temp_mean,precipitation_total,rain_total,snow_total,precipitation_hours,wind_speed_max,wind_gust_max,weather_code,sunshine_duration,daylight_duration
+2018-09-04,31.2,23.9,27.5,0.0,0.0,0.0,0.0,10.5,21.2,3,43013.66,46669.32
+2018-09-05,29.4,23.2,26.2,1.1,1.1,0.0,4.0,17.9,34.9,53,42916.46,46511.89
+2018-09-06,32.3,23.6,26.8,4.6,4.6,0.0,7.0,17.6,38.5,61,33312.05,46353.66
+2018-09-07,24.3,20.7,22.8,4.9,4.9,0.0,16.0,16.1,31.3,53,1282.37,46194.73
+2018-09-08,21.1,15.6,18.8,6.9,6.9,0.0,12.0,18.4,36.7,55,2515.72,46035.2
+2018-09-09,17.2,13.7,15.3,17.9,17.9,0.0,24.0,18.8,37.4,61,0.0,45875.18
+2018-09-10,17.5,13.6,16.0,30.8,30.8,0.0,24.0,28.8,56.5,63,0.0,45714.75
+2018-09-11,24.4,17.3,20.9,14.7,14.7,0.0,11.0,17.0,36.0,63,4967.27,45554.02
+2018-09-12,25.5,20.6,22.8,17.3,17.3,0.0,16.0,10.8,23.4,63,3325.69,45393.1
+2018-09-13,24.3,20.7,22.2,29.5,29.5,0.0,9.0,17.4,34.2,65,24258.51,45232.07
+2018-09-14,23.9,20.2,21.6,0.4,0.4,0.0,4.0,15.8,32.0,51,3740.55,45071.04
+2018-09-15,24.9,18.3,21.4,0.3,0.3,0.0,3.0,10.1,19.4,51,32685.79,44910.09
+2018-09-16,27.4,18.1,21.7,0.0,0.0,0.0,0.0,14.1,28.4,3,34611.62,44749.33
+2018-09-17,24.7,18.1,22.0,4.8,4.8,0.0,14.0,18.9,39.2,53,6915.09,44588.84
+2018-09-18,25.1,20.4,22.7,20.5,20.5,0.0,14.0,21.6,38.5,63,9241.86,44428.73
+2018-09-19,26.6,19.4,22.4,1.7,1.7,0.0,6.0,16.6,33.5,53,39495.63,44269.07
+2018-09-20,20.8,17.8,19.4,3.2,3.2,0.0,12.0,10.9,23.8,53,11825.79,44109.98
+2018-09-21,23.0,19.4,20.8,0.0,0.0,0.0,0.0,23.2,44.3,3,2846.96,43951.14
+2018-09-22,22.2,16.0,20.0,2.4,2.4,0.0,3.0,23.1,46.1,61,35033.01,43791.55
+2018-09-23,18.8,14.8,16.6,1.9,1.9,0.0,13.0,11.9,24.1,51,0.0,43631.24
+2018-09-24,20.1,14.1,16.8,0.1,0.1,0.0,1.0,23.3,48.2,51,24689.51,43470.31
+2018-09-25,21.8,14.3,18.9,61.4,61.4,0.0,22.0,28.1,56.2,65,0.0,43308.86
+2018-09-26,26.3,20.6,23.1,10.9,10.9,0.0,11.0,22.5,47.2,63,35389.68,43147.0
+2018-09-27,21.8,15.0,18.4,5.7,5.7,0.0,6.0,22.2,39.6,61,32400.0,42984.82
+2018-09-28,19.5,13.9,16.4,29.3,29.3,0.0,9.0,27.3,63.4,63,19786.42,42822.43
+2018-09-29,22.2,13.7,17.5,0.0,0.0,0.0,0.0,11.8,25.2,3,38584.73,42659.93
+2018-09-30,20.2,12.2,16.3,0.0,0.0,0.0,0.0,13.6,26.3,3,38508.36,42497.42
+2018-10-01,25.0,15.5,19.9,0.9,0.9,0.0,3.0,19.2,38.9,51,26480.82,42335.0
+2018-10-02,24.8,17.1,20.7,32.9,32.9,0.0,11.0,16.2,34.9,65,22627.44,42172.78
+2018-10-03,24.4,16.4,20.4,0.0,0.0,0.0,0.0,18.2,33.8,3,37493.02,42010.86
+2018-10-04,25.8,15.2,20.5,1.8,1.8,0.0,3.0,18.9,38.2,61,34062.29,41849.33
+2018-10-05,20.1,13.9,17.2,0.0,0.0,0.0,0.0,21.5,40.0,3,38192.4,41688.32
+2018-10-06,19.8,17.5,18.6,0.6,0.6,0.0,4.0,19.1,36.0,51,287.69,41527.91
+2018-10-07,26.5,19.3,22.3,0.1,0.1,0.0,1.0,16.1,29.9,51,10800.0,41368.22
+2018-10-08,19.8,17.7,18.8,0.8,0.8,0.0,5.0,17.8,34.2,51,0.0,41209.36
+2018-10-09,25.0,19.2,21.6,0.5,0.5,0.0,5.0,15.3,29.2,51,20834.79,41051.43
+2018-10-10,25.5,20.5,22.2,0.6,0.6,0.0,6.0,16.9,32.4,51,27016.84,40894.55
+2018-10-11,25.1,20.8,22.4,14.1,14.1,0.0,15.0,30.3,55.8,61,5102.43,40738.38
+2018-10-12,21.0,10.8,15.2,6.5,6.5,0.0,5.0,32.9,60.8,63,32103.53,40581.84
+2018-10-13,13.6,8.6,10.8,3.8,3.8,0.0,8.0,21.7,41.8,53,11341.76,40424.94
+2018-10-14,14.6,6.8,10.9,0.0,0.0,0.0,0.0,14.6,29.5,3,23733.44,40267.82
+2018-10-15,20.0,9.5,15.8,5.3,5.3,0.0,12.0,27.2,55.1,61,1142.64,40110.58
+2018-10-16,15.9,8.1,11.1,0.5,0.5,0.0,2.0,29.0,52.2,51,33615.7,39953.36
+2018-10-17,16.3,7.6,11.2,0.0,0.0,0.0,0.0,30.4,56.2,3,35979.66,39796.27
+2018-10-18,9.6,4.7,7.0,0.0,0.0,0.0,0.0,23.6,46.1,1,36329.9,39639.44
+2018-10-19,14.6,4.2,9.6,0.3,0.3,0.0,1.0,20.5,39.2,51,34590.27,39482.98
+2018-10-20,17.1,9.1,13.1,6.1,6.1,0.0,7.0,23.1,47.5,63,27650.76,39327.03
+2018-10-21,9.2,3.8,7.0,0.3,0.3,0.0,2.0,33.1,63.0,51,27771.21,39171.71
+2018-10-22,10.6,3.5,7.2,0.0,0.0,0.0,0.0,17.1,35.3,3,31360.17,39017.16
+2018-10-23,16.7,7.1,11.3,0.1,0.1,0.0,1.0,18.7,38.5,51,32036.72,38863.5
+2018-10-24,13.1,6.1,9.0,0.0,0.0,0.0,0.0,30.3,56.9,3,34822.13,38710.88
+2018-10-25,10.4,3.9,6.6,0.0,0.0,0.0,0.0,19.6,39.2,3,34795.7,38559.43
+2018-10-26,10.1,2.5,6.5,0.2,0.2,0.0,1.0,18.0,32.8,51,32794.06,38409.29
+2018-10-27,11.5,6.5,9.0,40.5,40.5,0.0,24.0,36.4,72.0,63,0.0,38260.59
+2018-10-28,11.6,6.8,9.3,1.3,1.3,0.0,5.0,17.2,39.2,53,16467.77,38113.5
+2018-10-29,13.6,7.2,10.4,2.0,2.0,0.0,7.0,23.3,44.3,53,29238.26,37968.17
+2018-10-30,12.4,4.2,7.7,0.0,0.0,0.0,0.0,20.7,41.0,0,34033.08,37824.72
+2018-10-31,17.0,4.4,11.2,0.0,0.0,0.0,0.0,22.5,43.2,3,32941.03,37682.86
+2018-11-01,20.0,11.0,15.6,0.1,0.1,0.0,1.0,23.2,41.4,51,30837.66,37541.34
+2018-11-02,20.9,17.2,19.1,10.1,10.1,0.0,19.0,29.4,62.6,61,0.0,37400.25
+2018-11-03,18.5,7.3,13.3,8.1,8.1,0.0,7.0,33.8,79.9,61,22803.61,37259.71
+2018-11-04,11.8,4.7,7.8,0.0,0.0,0.0,0.0,11.3,20.5,3,33370.03,37119.88
+2018-11-05,11.4,7.1,9.8,6.2,6.2,0.0,12.0,22.0,40.3,61,0.0,36980.89
+2018-11-06,16.6,9.4,12.2,13.6,13.6,0.0,10.0,25.8,76.0,63,0.0,36842.9
+2018-11-07,16.8,8.2,12.8,0.0,0.0,0.0,0.0,23.1,43.9,3,32248.07,36706.06
+2018-11-08,12.2,5.2,8.8,0.0,0.0,0.0,0.0,18.2,32.0,3,30385.53,36570.53
+2018-11-09,10.4,4.0,7.5,14.9,14.9,0.0,9.0,24.3,47.2,63,5898.37,36436.46
+2018-11-10,9.6,1.1,5.8,0.7,0.7,0.0,4.0,33.4,63.0,51,32483.88,36304.03
+2018-11-11,7.6,0.1,3.2,0.0,0.0,0.0,0.0,17.4,36.4,0,32247.95,36173.38
+2018-11-12,8.8,0.2,4.7,1.1,1.1,0.0,2.0,12.6,26.3,55,28714.71,36044.69
+2018-11-13,10.3,5.7,7.8,18.5,18.5,0.0,12.0,25.3,50.8,63,0.0,35918.14
+2018-11-14,5.2,-0.2,2.4,0.0,0.0,0.0,0.0,26.2,51.5,3,31707.88,35793.89
+2018-11-15,3.9,-1.5,0.7,22.6,6.4,11.34,11.0,35.0,61.9,75,0.0,35672.12
+2018-11-16,6.2,1.8,4.1,9.6,7.5,1.47,10.0,39.1,82.8,75,28800.0,35553.02
+2018-11-17,8.7,0.8,4.4,0.0,0.0,0.0,0.0,20.2,42.1,3,30982.55,35436.78
+2018-11-18,5.6,1.1,3.8,0.4,0.2,0.14,2.0,11.3,22.0,71,9379.0,35323.56
+2018-11-19,11.6,2.7,6.5,0.0,0.0,0.0,0.0,9.5,19.1,3,20971.63,35213.56
+2018-11-20,8.1,2.2,5.4,0.5,0.5,0.0,1.0,22.7,42.5,53,21543.86,35106.54
+2018-11-21,7.3,-3.0,2.8,0.2,0.2,0.0,2.0,28.5,55.8,51,31189.29,35001.36
+2018-11-22,-3.2,-7.7,-5.4,0.0,0.0,0.0,0.0,27.8,53.3,3,31141.97,34898.11
+2018-11-23,-0.8,-8.4,-4.5,0.0,0.0,0.0,0.0,16.9,34.2,3,31044.16,34796.89
+2018-11-24,7.9,-2.6,3.2,16.1,16.1,0.0,7.0,20.9,40.3,63,11437.53,34697.86
+2018-11-25,12.2,4.2,7.9,22.8,22.8,0.0,6.0,33.4,59.8,65,25200.0,34601.13
+2018-11-26,10.8,3.7,7.4,30.2,30.2,0.0,16.0,35.3,63.7,65,477.6,34506.86
+2018-11-27,7.0,3.0,4.9,1.8,1.8,0.0,4.0,32.5,61.2,53,12559.81,34415.18
+2018-11-28,5.2,2.0,3.5,0.0,0.0,0.0,0.0,36.6,68.8,3,17004.59,34326.23
+2018-11-29,6.4,1.8,4.3,0.0,0.0,0.0,0.0,31.4,59.8,3,29901.48,34240.14
+2018-11-30,4.8,-0.1,2.8,1.3,1.0,0.21,3.0,9.4,31.7,71,9364.26,34157.08
+2018-12-01,6.3,-1.0,3.3,3.0,3.0,0.0,6.0,12.6,22.0,53,26881.31,34077.17
+2018-12-02,14.6,5.2,10.6,16.6,16.6,0.0,21.0,18.0,35.3,63,0.0,34000.57
+2018-12-03,12.8,6.2,10.1,0.0,0.0,0.0,0.0,26.3,48.6,3,26693.77,33927.42
+2018-12-04,6.2,-2.0,2.0,0.0,0.0,0.0,0.0,23.0,44.6,3,29895.79,33857.86
+2018-12-05,2.2,-3.8,-1.1,0.0,0.0,0.0,0.0,13.6,28.1,3,20367.7,33792.06
+2018-12-06,4.2,-3.0,0.7,0.0,0.0,0.0,0.0,21.4,41.0,3,25925.27,33730.13
+2018-12-07,2.7,-4.2,0.2,0.0,0.0,0.0,0.0,21.9,43.9,1,29926.36,33672.25
+2018-12-08,1.9,-4.9,-1.8,0.0,0.0,0.0,0.0,19.8,39.2,3,19941.12,33618.54
+2018-12-09,1.6,-4.4,-1.4,0.0,0.0,0.0,0.0,12.3,27.4,3,20439.16,33569.16
+2018-12-10,3.8,-3.5,-0.7,0.0,0.0,0.0,0.0,17.2,36.0,3,29698.45,33524.03
+2018-12-11,3.7,-3.9,-0.7,0.0,0.0,0.0,0.0,18.7,33.8,1,29673.37,33482.58
+2018-12-12,5.1,-2.0,1.1,0.0,0.0,0.0,0.0,17.1,34.9,3,24647.14,33444.81
+2018-12-13,3.8,-0.9,1.6,0.9,0.5,0.28,6.0,19.8,37.1,71,6975.15,33410.76
+2018-12-14,9.9,2.8,6.3,1.4,1.4,0.0,5.0,15.5,29.9,51,6838.5,33380.45
+2018-12-15,9.6,5.9,7.6,4.7,4.7,0.0,11.0,25.3,43.2,53,0.0,33353.91
+2018-12-16,6.7,1.6,3.9,23.1,21.7,0.98,22.0,28.0,51.1,73,0.0,33331.19
+2018-12-17,7.6,1.8,4.1,0.6,0.6,0.0,3.0,29.3,52.9,51,18524.88,33312.29
+2018-12-18,2.3,-3.3,-0.4,0.0,0.0,0.0,0.0,29.8,53.6,3,29200.12,33297.25
+2018-12-19,4.7,-4.6,-0.6,0.0,0.0,0.0,0.0,13.8,26.6,3,29139.29,33286.09
+2018-12-20,10.6,-2.4,3.4,7.7,7.7,0.0,9.0,18.5,31.3,61,21520.87,33278.84
+2018-12-21,14.6,10.0,12.4,43.6,43.6,0.0,16.0,38.6,82.8,65,0.0,33275.5
+2018-12-22,9.9,3.9,6.5,3.3,3.3,0.0,3.0,31.1,60.5,61,18855.64,33276.11
+2018-12-23,5.8,-0.2,2.7,0.0,0.0,0.0,0.0,22.9,45.0,3,28909.64,33280.67
+2018-12-24,6.8,0.6,2.9,2.6,0.7,1.33,11.0,19.3,36.7,73,18146.82,33289.2
+2018-12-25,4.7,-1.4,1.2,0.0,0.0,0.0,0.0,18.6,38.9,3,25764.34,33301.71
+2018-12-26,5.8,-2.5,1.0,0.0,0.0,0.0,0.0,18.0,34.2,3,16403.13,33318.21
+2018-12-27,4.1,0.1,1.9,0.0,0.0,0.0,0.0,16.0,28.4,3,28955.64,33338.71
+2018-12-28,13.4,4.9,10.2,28.5,28.5,0.0,20.0,27.7,62.3,63,0.0,33363.21
+2018-12-29,12.4,1.9,8.2,0.1,0.1,0.0,1.0,25.8,49.0,51,28989.88,33391.73
+2018-12-30,5.2,-0.6,1.7,0.3,0.0,0.21,2.0,13.1,24.1,71,12333.29,33424.39
+2018-12-31,7.9,-1.1,3.4,22.1,22.1,0.0,10.0,24.5,43.6,63,5883.96,33461.48
+2019-01-01,14.2,3.2,9.4,3.1,3.1,0.0,3.0,38.0,67.7,61,18452.99,33502.93
+2019-01-02,3.7,-0.6,1.9,0.0,0.0,0.0,0.0,15.8,28.4,3,19357.07,33548.61
+2019-01-03,7.1,0.3,4.2,0.0,0.0,0.0,0.0,24.2,47.2,3,22127.57,33598.4
+2019-01-04,8.0,-1.4,2.8,0.0,0.0,0.0,0.0,19.6,37.8,3,24464.69,33652.18
+2019-01-05,6.7,3.0,5.4,20.6,20.6,0.0,18.0,22.7,42.1,63,0.0,33709.84
+2019-01-06,8.6,-1.0,3.8,0.2,0.2,0.0,1.0,30.0,56.9,51,28503.95,33771.23
+2019-01-07,0.2,-5.3,-2.1,0.2,0.1,0.07,2.0,17.2,32.4,71,23996.83,33836.26
+2019-01-08,8.2,0.4,4.3,0.2,0.2,0.0,2.0,13.4,33.8,51,10552.66,33904.79
+2019-01-09,7.1,0.3,4.4,3.6,3.6,0.0,4.0,34.6,65.2,61,25488.23,33976.69
+2019-01-10,0.8,-2.1,-0.4,0.0,0.0,0.0,0.0,36.7,68.4,3,20963.05,34051.85
+2019-01-11,-1.4,-5.9,-3.9,0.0,0.0,0.0,0.0,26.3,51.5,3,29951.96,34130.14
+2019-01-12,2.3,-6.4,-2.8,0.0,0.0,0.0,0.0,10.6,20.9,3,29357.16,34211.44
+2019-01-13,0.3,-4.1,-2.5,0.8,0.0,0.56,6.0,17.3,32.8,71,25903.73,34295.64
+2019-01-14,1.1,-6.4,-3.7,0.0,0.0,0.0,0.0,20.3,37.1,3,30076.59,34382.6
+2019-01-15,3.5,-4.8,-1.9,0.0,0.0,0.0,0.0,9.5,23.4,3,29494.34,34472.21
+2019-01-16,4.6,-4.1,-0.2,0.0,0.0,0.0,0.0,22.4,43.9,3,30002.61,34564.36
+2019-01-17,-0.2,-5.5,-2.6,0.3,0.0,0.21,1.0,18.2,33.1,73,22266.33,34658.92
+2019-01-18,3.6,-2.2,0.3,1.4,0.0,0.98,8.0,12.3,21.6,73,13850.21,34755.77
+2019-01-19,3.2,-3.7,-0.1,5.3,0.6,3.29,4.0,19.7,35.3,75,14658.24,34855.22
+2019-01-20,8.8,-11.5,-0.7,29.6,27.3,1.61,13.0,31.3,61.2,75,6425.02,34958.25
+2019-01-21,-11.9,-15.8,-13.4,0.0,0.0,0.0,0.0,33.3,62.6,3,30776.06,35064.75
+2019-01-22,-2.2,-12.1,-7.3,0.0,0.0,0.0,0.0,19.5,36.0,3,30510.7,35174.51
+2019-01-23,7.7,-3.5,1.8,0.1,0.1,0.0,1.0,22.9,40.7,51,11909.11,35287.37
+2019-01-24,12.6,0.1,8.4,34.4,34.4,0.0,18.0,42.9,92.2,63,0.0,35403.12
+2019-01-25,2.8,-3.8,-1.0,0.0,0.0,0.0,0.0,31.6,59.0,3,26179.88,35521.59
+2019-01-26,1.0,-6.3,-2.9,0.0,0.0,0.0,0.0,13.0,29.5,3,31924.3,35642.61
+2019-01-27,9.6,-2.7,3.0,0.0,0.0,0.0,0.0,28.8,55.4,3,28750.15,35765.98
+2019-01-28,1.3,-5.8,-2.8,0.0,0.0,0.0,0.0,18.9,34.6,3,31577.17,35891.54
+2019-01-29,4.3,-5.8,-0.2,5.7,5.1,0.42,12.0,22.8,40.3,73,0.0,36019.12
+2019-01-30,0.3,-14.8,-6.6,0.4,0.0,0.28,3.0,38.4,71.6,71,26884.57,36148.55
+2019-01-31,-8.4,-16.6,-12.5,0.0,0.0,0.0,0.0,21.3,44.6,0,32793.02,36279.68
+2019-02-01,-4.8,-12.1,-8.8,0.0,0.0,0.0,0.0,13.2,24.1,3,27911.26,36412.32
+2019-02-02,2.5,-9.9,-4.2,0.0,0.0,0.0,0.0,22.8,43.9,3,32403.46,36546.34
+2019-02-03,10.0,-3.5,1.9,0.0,0.0,0.0,0.0,16.6,32.4,2,33226.9,36681.56
+2019-02-04,11.9,-1.8,3.9,0.0,0.0,0.0,0.0,14.1,30.6,1,33373.6,36817.85
+2019-02-05,17.6,0.7,7.5,0.0,0.0,0.0,0.0,17.4,29.9,3,31915.86,36955.05
+2019-02-06,7.1,-1.3,3.0,9.9,9.9,0.0,6.0,17.0,31.0,63,27012.72,37093.02
+2019-02-07,7.8,2.4,5.0,5.1,5.1,0.0,5.0,15.6,27.0,61,3400.59,37231.61
+2019-02-08,11.2,-0.9,5.4,9.0,9.0,0.0,12.0,33.3,61.6,61,17612.41,37371.16
+2019-02-09,0.6,-5.2,-3.0,0.0,0.0,0.0,0.0,31.8,62.3,2,34118.37,37512.93
+2019-02-10,2.8,-6.2,-2.2,0.0,0.0,0.0,0.0,13.2,28.1,3,34271.85,37656.82
+2019-02-11,2.8,-2.0,-0.0,0.3,0.0,0.21,3.0,11.6,23.0,71,12777.81,37802.67
+2019-02-12,0.9,-2.8,-1.3,16.4,10.9,3.85,16.0,26.7,49.3,75,0.0,37950.3
+2019-02-13,5.7,-1.8,2.0,2.2,2.2,0.0,4.0,34.9,63.7,53,21005.38,38099.58
+2019-02-14,7.1,-3.0,1.5,0.0,0.0,0.0,0.0,19.6,39.2,3,34822.73,38250.33
+2019-02-15,13.0,2.8,8.3,0.3,0.3,0.0,3.0,25.2,42.1,51,22442.48,38402.41
+2019-02-16,5.8,-2.3,1.8,0.0,0.0,0.0,0.0,20.6,45.0,3,34997.9,38555.66
+2019-02-17,2.3,-5.4,-1.2,1.1,1.1,0.0,3.0,14.8,31.3,53,35076.93,38709.95
+2019-02-18,6.4,-3.8,0.9,4.0,2.7,0.91,9.0,29.0,55.1,73,10243.74,38865.12
+2019-02-19,2.4,-5.6,-2.4,0.0,0.0,0.0,0.0,19.5,36.0,3,35339.56,39021.04
+2019-02-20,-0.8,-4.2,-2.7,12.0,4.1,5.53,13.0,16.3,47.9,75,0.0,39177.56
+2019-02-21,11.3,-0.1,4.4,3.7,3.7,0.0,4.0,19.8,38.9,61,35080.43,39334.55
+2019-02-22,7.0,-0.4,3.1,0.0,0.0,0.0,0.0,17.3,34.2,3,27432.9,39491.88
+2019-02-23,4.6,-2.5,1.1,1.5,1.5,0.0,3.0,11.9,24.8,53,20485.7,39649.42
+2019-02-24,9.4,2.1,6.4,19.9,19.9,0.0,13.0,35.5,68.8,63,1091.64,39807.03
+2019-02-25,5.0,-3.0,1.7,0.0,0.0,0.0,0.0,44.6,82.4,3,33542.79,39964.58
+2019-02-26,3.0,-4.8,-2.1,0.0,0.0,0.0,0.0,25.3,50.8,3,36472.96,40121.95
+2019-02-27,-1.7,-5.5,-3.7,0.5,0.0,0.35,4.0,18.3,36.7,71,14046.06,40279.02
+2019-02-28,2.7,-4.2,-1.1,0.3,0.0,0.21,2.0,26.1,51.5,71,34680.77,40436.11
+2019-03-01,2.8,-3.5,-0.6,2.1,0.0,1.47,6.0,13.3,26.3,73,7248.83,40594.44
+2019-03-02,3.7,-1.0,1.3,9.5,0.0,6.72,9.0,18.4,33.8,75,25391.91,40753.93
+2019-03-03,5.7,-0.9,1.9,10.0,0.3,6.79,7.0,14.5,28.1,75,28063.99,40914.49
+2019-03-04,2.3,-5.7,-0.2,16.1,0.0,11.27,6.0,21.3,38.2,75,31227.24,41075.97
+2019-03-05,-1.6,-14.6,-6.2,0.0,0.0,0.0,0.0,19.0,34.2,2,37628.05,41238.26
+2019-03-06,-3.0,-9.8,-6.0,0.0,0.0,0.0,0.0,26.0,51.1,3,29823.71,41401.25
+2019-03-07,0.6,-9.2,-4.5,0.0,0.0,0.0,0.0,26.3,51.5,3,32797.89,41564.82
+2019-03-08,2.6,-7.2,-2.0,0.0,0.0,0.0,0.0,14.3,29.5,3,34844.63,41728.84
+2019-03-09,8.2,-4.1,1.4,0.0,0.0,0.0,0.0,13.0,23.8,3,38169.29,41893.22
+2019-03-10,7.8,1.1,3.6,14.0,11.1,2.03,10.0,24.2,46.8,75,0.0,42057.83
+2019-03-11,10.6,2.0,6.3,0.0,0.0,0.0,0.0,26.0,49.7,3,38610.58,42222.57
+2019-03-12,5.0,-1.5,1.7,0.0,0.0,0.0,0.0,25.0,49.0,3,38898.82,42387.31
+2019-03-13,6.6,-4.9,1.7,0.0,0.0,0.0,0.0,17.2,33.5,3,33571.29,42551.96
+2019-03-14,11.1,0.8,6.8,0.0,0.0,0.0,0.0,21.5,40.3,3,38560.82,42716.4
+2019-03-15,21.8,9.8,15.3,20.4,20.4,0.0,7.0,29.4,53.3,65,17998.09,42880.52
+2019-03-16,10.3,1.7,6.6,0.0,0.0,0.0,0.0,29.9,57.2,3,37108.74,43044.21
+2019-03-17,5.9,-2.0,1.7,0.0,0.0,0.0,0.0,20.0,40.0,2,40026.87,43207.36
+2019-03-18,7.1,-4.1,1.5,0.0,0.0,0.0,0.0,16.9,34.9,3,35062.98,43369.86
+2019-03-19,8.0,-3.2,2.6,0.0,0.0,0.0,0.0,16.2,33.5,3,32147.55,43531.59
+2019-03-20,9.9,-1.5,4.1,0.0,0.0,0.0,0.0,24.6,47.2,3,39737.39,43692.88
+2019-03-21,8.6,3.0,5.9,18.3,18.3,0.0,20.0,25.9,51.8,63,0.0,43854.82
+2019-03-22,9.3,2.7,5.8,13.2,13.2,0.0,14.0,37.7,68.8,63,13580.49,44017.36
+2019-03-23,8.4,-0.2,3.7,0.3,0.0,0.21,3.0,34.2,62.6,71,31687.92,44180.42
+2019-03-24,13.3,-1.4,6.5,0.0,0.0,0.0,0.0,20.8,39.2,3,28146.08,44343.9
+2019-03-25,9.1,3.0,7.6,1.1,1.1,0.0,5.0,17.6,36.4,51,3276.7,44507.68
+2019-03-26,7.9,-2.5,2.8,0.0,0.0,0.0,0.0,21.8,42.5,1,41493.85,44671.69
+2019-03-27,6.4,-2.6,1.9,0.0,0.0,0.0,0.0,15.8,33.1,3,41656.98,44835.8
+2019-03-28,8.8,-2.5,4.1,0.0,0.0,0.0,0.0,24.9,48.2,3,39910.41,44999.93
+2019-03-29,12.6,5.0,9.1,0.8,0.8,0.0,5.0,12.2,25.6,51,921.16,45163.97
+2019-03-30,16.2,6.8,11.4,0.0,0.0,0.0,0.0,29.7,54.7,3,33658.74,45327.82
+2019-03-31,17.6,3.5,10.2,4.6,4.6,0.0,5.0,26.4,52.9,61,19279.57,45491.37
+2019-04-01,7.4,-0.9,3.0,0.0,0.0,0.0,0.0,31.3,59.4,3,42467.27,45654.52
+2019-04-02,8.7,-2.0,4.2,1.0,1.0,0.0,6.0,23.7,47.9,51,28598.75,45817.16
+2019-04-03,16.5,2.1,9.6,0.5,0.5,0.0,3.0,38.5,71.3,51,42786.98,45979.19
+2019-04-04,13.4,2.0,7.7,0.0,0.0,0.0,0.0,27.1,53.6,3,42945.48,46140.5
+2019-04-05,5.3,1.5,3.6,7.3,7.3,0.0,14.0,15.7,31.0,61,1656.07,46300.98
+2019-04-06,20.0,3.6,10.1,0.2,0.2,0.0,2.0,21.1,41.0,51,39557.13,46460.52
+2019-04-07,16.1,4.2,10.5,0.0,0.0,0.0,0.0,18.5,38.2,3,36196.37,46619.0
+2019-04-08,24.2,8.6,15.8,12.6,12.6,0.0,8.0,23.6,57.6,63,24721.92,46776.31
+2019-04-09,12.2,4.4,8.6,0.9,0.9,0.0,2.0,21.0,50.0,53,36397.97,46932.74
+2019-04-10,14.2,4.3,9.0,0.0,0.0,0.0,0.0,24.3,48.2,3,43200.0,47089.33
+2019-04-11,10.4,2.7,6.3,0.0,0.0,0.0,0.0,14.1,28.1,3,36565.94,47246.04
+2019-04-12,18.6,6.2,12.2,15.2,15.2,0.0,9.0,31.0,56.5,63,22271.96,47402.77
+2019-04-13,23.0,14.1,17.5,12.6,12.6,0.0,9.0,22.5,46.8,63,22158.37,47559.43
+2019-04-14,18.5,10.9,15.2,1.9,1.9,0.0,5.0,25.6,43.6,55,22183.88,47715.9
+2019-04-15,18.5,7.2,13.4,15.0,15.0,0.0,10.0,37.6,81.0,63,29649.22,47872.08
+2019-04-16,17.0,5.1,10.9,0.3,0.3,0.0,1.0,28.1,54.4,51,43705.63,48027.87
+2019-04-17,14.9,7.8,10.4,0.4,0.4,0.0,1.0,20.8,38.2,51,36000.0,48183.14
+2019-04-18,16.6,8.6,12.5,1.7,1.7,0.0,8.0,24.2,47.9,53,14400.0,48337.8
+2019-04-19,23.6,15.4,18.5,5.4,5.4,0.0,10.0,32.4,58.7,63,32775.27,48491.73
+2019-04-20,17.7,11.4,15.4,11.8,11.8,0.0,13.0,31.8,73.1,61,360.09,48644.81
+2019-04-21,16.9,9.9,13.3,0.3,0.3,0.0,2.0,20.9,41.0,51,35473.65,48796.92
+2019-04-22,17.0,9.5,13.4,8.1,8.1,0.0,15.0,22.9,43.9,61,14815.11,48947.95
+2019-04-23,22.8,9.8,15.8,0.2,0.2,0.0,2.0,21.9,42.5,51,43296.33,49097.76
+2019-04-24,19.5,10.1,16.0,0.0,0.0,0.0,0.0,20.9,43.9,3,44698.35,49246.24
+2019-04-25,16.2,7.5,11.4,1.9,1.9,0.0,6.0,17.0,36.0,53,32403.16,49393.25
+2019-04-26,16.6,10.2,13.4,18.9,18.9,0.0,21.0,28.9,58.3,61,5904.13,49538.66
+2019-04-27,12.1,7.4,10.1,3.1,3.1,0.0,4.0,38.2,72.7,61,35685.02,49682.34
+2019-04-28,10.8,5.8,8.5,1.9,1.9,0.0,6.0,18.9,36.0,55,9822.66,49824.16
+2019-04-29,13.1,3.1,8.5,0.9,0.9,0.0,2.0,18.3,36.7,53,38090.4,49964.36
+2019-04-30,18.9,8.6,12.1,3.7,3.7,0.0,6.0,15.0,31.7,55,13472.75,50103.94
+2019-05-01,12.4,8.8,10.6,1.7,1.7,0.0,6.0,18.8,38.2,53,88.17,50242.86
+2019-05-02,23.9,10.2,14.7,5.7,5.7,0.0,7.0,19.6,44.3,61,39169.56,50381.0
+2019-05-03,14.1,9.5,11.5,1.1,1.1,0.0,4.0,12.7,25.6,51,0.0,50518.27
+2019-05-04,20.6,10.8,14.9,9.6,9.6,0.0,11.0,8.3,19.1,61,19398.42,50654.52
+2019-05-05,13.3,10.9,12.0,32.3,32.3,0.0,24.0,24.5,44.6,63,0.0,50789.64
+2019-05-06,20.5,9.4,14.0,0.2,0.2,0.0,2.0,18.5,33.5,51,40686.59,50923.5
+2019-05-07,19.1,10.2,14.4,4.3,4.3,0.0,5.0,18.9,39.6,61,45192.72,51055.99
+2019-05-08,19.4,10.1,14.2,0.0,0.0,0.0,0.0,16.8,33.1,3,35886.93,51186.98
+2019-05-09,15.2,9.4,12.0,0.1,0.1,0.0,1.0,21.4,42.1,51,18233.76,51316.32
+2019-05-10,22.6,12.1,17.8,2.3,2.3,0.0,6.0,21.4,40.7,53,16369.52,51443.89
+2019-05-11,19.5,11.2,16.0,3.9,3.9,0.0,5.0,19.3,40.0,61,39146.99,51569.55
+2019-05-12,12.8,6.8,9.0,28.7,28.7,0.0,23.0,32.6,61.6,63,0.0,51693.18
+2019-05-13,10.9,6.8,8.5,17.7,17.7,0.0,14.0,20.5,41.0,63,3600.0,51814.6
+2019-05-14,11.9,6.9,9.0,5.1,5.1,0.0,12.0,16.4,30.2,53,1020.25,51933.69
+2019-05-15,19.4,6.1,13.0,0.5,0.5,0.0,3.0,22.3,43.6,51,45486.12,52050.3
+2019-05-16,20.6,9.6,15.7,0.8,0.8,0.0,6.0,15.4,36.4,51,48691.21,52164.29
+2019-05-17,24.4,11.9,18.8,0.2,0.2,0.0,2.0,17.7,35.6,51,39615.34,52275.49
+2019-05-18,22.9,11.2,16.7,0.0,0.0,0.0,0.0,16.2,28.8,3,48894.36,52383.75
+2019-05-19,22.8,12.3,18.2,2.4,2.4,0.0,4.0,25.6,49.0,55,47441.0,52489.26
+2019-05-20,28.0,18.7,23.1,1.7,1.7,0.0,5.0,26.5,61.9,53,47632.42,52592.85
+2019-05-21,19.8,12.4,16.0,0.0,0.0,0.0,0.0,26.3,50.4,3,49181.6,52694.46
+2019-05-22,22.2,10.4,16.7,0.0,0.0,0.0,0.0,16.2,32.4,3,46659.08,52794.0
+2019-05-23,24.9,14.1,18.8,0.8,0.8,0.0,3.0,23.0,43.9,53,37003.31,52891.37
+2019-05-24,23.2,14.6,19.6,0.3,0.3,0.0,1.0,32.8,64.1,51,47938.18,52986.44
+2019-05-25,19.5,12.2,15.8,0.0,0.0,0.0,0.0,26.2,50.4,3,49535.76,53079.13
+2019-05-26,29.0,15.1,21.8,3.0,3.0,0.0,5.0,20.9,39.6,61,43200.0,53169.33
+2019-05-27,25.5,15.6,21.0,0.0,0.0,0.0,0.0,15.9,32.4,3,49656.98,53256.91
+2019-05-28,20.8,15.9,18.4,6.9,6.9,0.0,9.0,30.8,57.6,61,15997.57,53341.79
+2019-05-29,21.0,13.8,17.9,2.3,2.3,0.0,6.0,20.1,37.4,53,19328.21,53423.84
+2019-05-30,20.4,13.2,16.8,14.4,14.4,0.0,9.0,17.0,42.8,63,13643.93,53502.95
+2019-05-31,25.0,17.1,21.1,0.4,0.4,0.0,4.0,14.6,29.2,51,49836.14,53579.02
+2019-06-01,25.9,15.6,19.9,0.0,0.0,0.0,0.0,17.4,37.4,3,49878.14,53651.92
+2019-06-02,25.9,16.2,20.8,1.8,1.8,0.0,5.0,20.1,46.1,55,42366.16,53721.55
+2019-06-03,20.8,11.9,16.7,0.0,0.0,0.0,0.0,25.8,50.4,2,49958.14,53787.8
+2019-06-04,21.4,10.9,16.3,0.0,0.0,0.0,0.0,18.7,37.1,3,49995.97,53850.55
+2019-06-05,26.2,16.2,21.0,6.4,6.4,0.0,5.0,21.2,42.8,63,29000.61,53909.7
+2019-06-06,27.2,18.9,23.1,8.8,8.8,0.0,4.0,19.3,35.6,63,50066.87,53965.12
+2019-06-07,25.5,16.7,20.5,0.0,0.0,0.0,0.0,16.9,33.8,3,45892.7,54016.7
+2019-06-08,25.9,15.3,20.3,0.0,0.0,0.0,0.0,21.4,42.1,3,50131.32,54064.52
+2019-06-09,23.7,13.6,18.5,0.0,0.0,0.0,0.0,22.1,43.9,3,50161.96,54108.94
+2019-06-10,20.5,13.6,17.6,15.8,15.8,0.0,18.0,23.9,43.2,61,0.0,54149.97
+2019-06-11,23.5,15.8,20.1,8.4,8.4,0.0,9.0,27.6,54.7,61,36240.15,54187.59
+2019-06-12,22.2,13.1,17.8,0.0,0.0,0.0,0.0,21.6,42.5,3,50247.64,54221.75
+2019-06-13,19.3,14.9,16.8,20.3,20.3,0.0,11.0,29.9,57.2,63,24408.88,54252.45
+2019-06-14,22.6,14.3,17.9,0.3,0.3,0.0,3.0,27.9,53.3,51,50298.79,54279.64
+2019-06-15,26.7,13.1,20.5,0.0,0.0,0.0,0.0,24.1,46.8,3,46391.4,54303.31
+2019-06-16,26.5,17.8,22.3,4.4,4.4,0.0,13.0,19.7,45.0,53,27321.03,54323.45
+2019-06-17,23.5,19.3,21.2,6.0,6.0,0.0,12.0,11.9,30.2,61,11254.65,54340.01
+2019-06-18,24.7,19.2,21.0,15.0,15.0,0.0,19.0,15.2,34.9,63,2926.9,54352.99
+2019-06-19,21.1,18.0,19.8,12.9,12.9,0.0,14.0,15.6,34.6,61,5531.7,54362.37
+2019-06-20,26.2,19.0,22.1,10.6,10.6,0.0,15.0,15.8,39.2,63,21182.92,54368.12
+2019-06-21,24.1,18.2,20.5,17.1,17.1,0.0,13.0,25.2,47.2,63,25238.35,54370.26
+2019-06-22,24.2,15.8,20.4,0.0,0.0,0.0,0.0,23.1,43.9,2,50400.0,54368.75
+2019-06-23,26.8,15.4,21.7,0.0,0.0,0.0,0.0,17.9,32.0,3,50400.0,54363.59
+2019-06-24,28.0,16.1,22.4,0.0,0.0,0.0,0.0,11.8,22.0,3,36018.93,54354.77
+2019-06-25,28.0,20.4,23.3,10.6,10.6,0.0,11.0,11.4,28.1,63,19588.62,54342.28
+2019-06-26,29.8,20.4,25.0,0.0,0.0,0.0,0.0,17.3,31.0,3,50400.0,54326.11

lr_attendance/prepare_for_modeling.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.model_selection import train_test_split
+import matplotlib.pyplot as plt
+import seaborn as sns
+# Load the complete feature dataset
+df = pd.read_csv("attendance_features_complete.csv")
+print("=== FEATURE ENGINEERING ANALYSIS ===")
+print(f"Dataset shape: {df.shape}")
+print(f"Target variable: attendance_rate")
+# Target variable statistics
+print("\n=== TARGET VARIABLE ANALYSIS ===")
+print("Attendance Rate Statistics:")
+print(df["attendance_rate"].describe())
+# Create target categories for analysis
+df["attendance_category"] = pd.cut(
+    df["attendance_rate"],
+    bins=[0, 85, 92, 95, 100],
+    labels=["Poor", "Average", "Good", "Excellent"],
+)
+print("\nAttendance Categories:")
+print(df["attendance_category"].value_counts())
+# Feature list by category
+temporal_features = [
+    "day_of_week",
+    "month",
+    "quarter",
+    "week_of_year",
+    "day_of_month",
+    "day_of_year",
+    "is_weekend",
+    "is_school_day",
+    "is_month_start",
+    "is_month_end",
+    "is_friday",
+    "is_monday",
+    "school_year_progress",
+]
+holiday_features = ["is_holiday", "days_to_next_holiday", "days_since_last_holiday"]
+weather_features = [
+    "temp_max",
+    "temp_min",
+    "temp_mean",
+    "temp_range",
+    "precipitation_total",
+    "rain_total",
+    "snow_total",
+    "precipitation_hours",
+    "wind_speed_max",
+    "wind_gust_max",
+    "sunshine_duration",
+    "daylight_duration",
+    "is_rainy_day",
+    "is_snowy_day",
+    "is_windy_day",
+    "is_extreme_temp",
+    "weather_severity",
+]
+# School-level features
+school_features = ["School DBN"]
+# Target variable
+target = "attendance_rate"
+print(f"\n=== FEATURE CATEGORIES ===")
+print(f"Temporal features: {len(temporal_features)}")
+print(f"Holiday features: {len(holiday_features)}")
+print(f"Weather features: {len(weather_features)}")
+print(f"School features: {len(school_features)}")
+# Check for missing values
+print("\n=== MISSING VALUES ANALYSIS ===")
+all_features = temporal_features + holiday_features + weather_features
+missing_analysis = df[all_features + [target]].isnull().sum()
+print(missing_analysis[missing_analysis > 0])
+# Correlation analysis
+print("\n=== CORRELATION ANALYSIS ===")
+numeric_features = df[
+    temporal_features + holiday_features + weather_features + [target]
+].select_dtypes(include=[np.number])
+correlation_matrix = numeric_features.corr()
+# Top correlations with target
+target_correlations = correlation_matrix[target].abs().sort_values(ascending=False)
+print("Top 15 features correlated with attendance rate:")
+print(target_correlations.head(16)[1:])  # Exclude self-correlation
+# Feature importance for linear regression (high correlation features)
+high_corr_features = target_correlations[target_correlations > 0.1].index.tolist()
+print(f"\nFeatures with correlation > 0.1: {len(high_corr_features)}")
+print(high_corr_features)
+# Prepare data for modeling
+print("\n=== DATA PREPARATION FOR MODELING ===")
+# Handle missing values in weather features
+df_clean = df.copy()
+for feature in weather_features:
+    if df_clean[feature].isnull().sum() > 0:
+        # Fill with median for numeric features
+        df_clean[feature] = df_clean[feature].fillna(df_clean[feature].median())
+# Create interaction features
+df_clean["temp_humidity_interaction"] = (
+    df_clean["temp_mean"] * df_clean["precipitation_total"]
+)
+df_clean["wind_precip_interaction"] = (
+    df_clean["wind_speed_max"] * df_clean["precipitation_total"]
+)
+df_clean["holiday_weather_interaction"] = (
+    df_clean["is_holiday"] * df_clean["weather_severity"]
+)
+# Polynomial features for important continuous variables
+df_clean["temp_squared"] = df_clean["temp_mean"] ** 2
+df_clean["precipitation_squared"] = df_clean["precipitation_total"] ** 2
+# Encoding categorical features
+le = LabelEncoder()
+df_clean["season_encoded"] = le.fit_transform(df_clean["season"])
+# Final feature list
+final_features = (
+    temporal_features
+    + holiday_features
+    + weather_features
+    + [
+        "temp_humidity_interaction",
+        "wind_precip_interaction",
+        "holiday_weather_interaction",
+        "temp_squared",
+        "precipitation_squared",
+        "season_encoded",
+    ]
+)
+# Remove any remaining non-numeric or problematic features
+final_features = [
+    f for f in final_features if f in df_clean.columns and df_clean[f].dtype != "object"
+]
+print(f"Final feature count for modeling: {len(final_features)}")
+# Split data
+X = df_clean[final_features]
+y = df_clean[target]
+# Remove rows with missing target
+mask = ~y.isnull()
+X = X[mask]
+y = y[mask]
+print(f"Final dataset shape for modeling: {X.shape}")
+# Train-test split
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, random_state=42
+)
+print(f"Training set: {X_train.shape}")
+print(f"Test set: {X_test.shape}")
+# Feature scaling
+scaler = StandardScaler()
+X_train_scaled = scaler.fit_transform(X_train)
+X_test_scaled = scaler.transform(X_test)
+# Save prepared datasets
+train_data = pd.DataFrame(X_train_scaled, columns=final_features)
+train_data["attendance_rate"] = y_train.values
+test_data = pd.DataFrame(X_test_scaled, columns=final_features)
+test_data["attendance_rate"] = y_test.values
+train_data.to_csv("train_data_scaled.csv", index=False)
+test_data.to_csv("test_data_scaled.csv", index=False)
+# Save feature information
+feature_info = {
+    "final_features": final_features,
+    "temporal_features": temporal_features,
+    "holiday_features": holiday_features,
+    "weather_features": weather_features,
+    "target_correlations": target_correlations.to_dict(),
+}
+import json
+with open("feature_info.json", "w") as f:
+    json.dump(feature_info, f, indent=2)
+print("\n=== DATASETS SAVED ===")
+print("v train_data_scaled.csv - Training data with scaled features")
+print("v test_data_scaled.csv - Test data with scaled features")
+print("v feature_info.json - Feature metadata and correlations")
+print(f"\n=== FEATURE ENGINEERING SUMMARY ===")
+print(f"v Enhanced date column with {len(temporal_features)} temporal features")
+print(f"v Added {len(holiday_features)} holiday-related features")
+print(f"v Integrated {len(weather_features)} weather features")
+print(f"v Created interaction and polynomial features")
+print(f"v Final dataset ready for multiple linear regression")
+print(f"v Average attendance rate: {df['attendance_rate'].mean():.2f}%")
+print(f"v Features most correlated with attendance: {high_corr_features[:5]}")

lr_attendance/train.ipynb ADDED Viewed

	@@ -0,0 +1,1140 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "ab55b6f5-72a8-42bb-ae90-53ec5bb79501",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "School DBN    0\n",
+      "Date          0\n",
+      "Enrolled      0\n",
+      "Absent        0\n",
+      "Present       0\n",
+      "Released      0\n",
+      "dtype: int64\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Date</th>\n",
+       "      <th>Enrolled</th>\n",
+       "      <th>Absent</th>\n",
+       "      <th>Present</th>\n",
+       "      <th>Released</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>2.771530e+05</td>\n",
+       "      <td>277153.00000</td>\n",
+       "      <td>277153.000000</td>\n",
+       "      <td>277153.000000</td>\n",
+       "      <td>277153.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>2.018665e+07</td>\n",
+       "      <td>596.98617</td>\n",
+       "      <td>50.503538</td>\n",
+       "      <td>544.499403</td>\n",
+       "      <td>1.983229</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>4.555413e+03</td>\n",
+       "      <td>482.90966</td>\n",
+       "      <td>54.329671</td>\n",
+       "      <td>452.970313</td>\n",
+       "      <td>35.114511</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>2.018090e+07</td>\n",
+       "      <td>1.00000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>2.018111e+07</td>\n",
+       "      <td>329.00000</td>\n",
+       "      <td>23.000000</td>\n",
+       "      <td>291.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>2.019013e+07</td>\n",
+       "      <td>476.00000</td>\n",
+       "      <td>38.000000</td>\n",
+       "      <td>430.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>2.019041e+07</td>\n",
+       "      <td>684.00000</td>\n",
+       "      <td>59.000000</td>\n",
+       "      <td>640.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>2.019063e+07</td>\n",
+       "      <td>5955.00000</td>\n",
+       "      <td>2151.000000</td>\n",
+       "      <td>5847.000000</td>\n",
+       "      <td>5904.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               Date      Enrolled         Absent        Present       Released\n",
+       "count  2.771530e+05  277153.00000  277153.000000  277153.000000  277153.000000\n",
+       "mean   2.018665e+07     596.98617      50.503538     544.499403       1.983229\n",
+       "std    4.555413e+03     482.90966      54.329671     452.970313      35.114511\n",
+       "min    2.018090e+07       1.00000       0.000000       1.000000       0.000000\n",
+       "25%    2.018111e+07     329.00000      23.000000     291.000000       0.000000\n",
+       "50%    2.019013e+07     476.00000      38.000000     430.000000       0.000000\n",
+       "75%    2.019041e+07     684.00000      59.000000     640.000000       0.000000\n",
+       "max    2.019063e+07    5955.00000    2151.000000    5847.000000    5904.000000"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# import lib\n",
+    "import pandas as pd\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.metrics import mean_absolute_error ,r2_score\n",
+    "from sklearn.linear_model import Ridge\n",
+    "import matplotlib.pyplot as plt\n",
+    "#data clean\n",
+    "\n",
+    "df = pd.read_csv(\"DailyPresence.csv\")\n",
+    "print(df.isnull().sum())\n",
+    "df.describe()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "b403a040-92fc-440e-a0fc-4d7d5395c2f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# creating the variables \n",
+    "x = df[['Enrolled']]\n",
+    "y = df['Present']\n",
+    "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "240452cf-351a-48e1-8f65-9052c8700094",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-4 {\n",
+       "  /* Definition of color scheme common for light and dark mode */\n",
+       "  --sklearn-color-text: #000;\n",
+       "  --sklearn-color-text-muted: #666;\n",
+       "  --sklearn-color-line: gray;\n",
+       "  /* Definition of color scheme for unfitted estimators */\n",
+       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
+       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
+       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
+       "  --sklearn-color-unfitted-level-3: chocolate;\n",
+       "  /* Definition of color scheme for fitted estimators */\n",
+       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
+       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
+       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
+       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4.light {\n",
+       "  /* Specific color for light theme */\n",
+       "  --sklearn-color-text-on-default-background: black;\n",
+       "  --sklearn-color-background: white;\n",
+       "  --sklearn-color-border-box: black;\n",
+       "  --sklearn-color-icon: #696969;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4.dark {\n",
+       "  --sklearn-color-text-on-default-background: white;\n",
+       "  --sklearn-color-background: #111;\n",
+       "  --sklearn-color-border-box: white;\n",
+       "  --sklearn-color-icon: #878787;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 pre {\n",
+       "  padding: 0;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 input.sk-hidden--visually {\n",
+       "  border: 0;\n",
+       "  clip: rect(1px 1px 1px 1px);\n",
+       "  clip: rect(1px, 1px, 1px, 1px);\n",
+       "  height: 1px;\n",
+       "  margin: -1px;\n",
+       "  overflow: hidden;\n",
+       "  padding: 0;\n",
+       "  position: absolute;\n",
+       "  width: 1px;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-dashed-wrapped {\n",
+       "  border: 1px dashed var(--sklearn-color-line);\n",
+       "  margin: 0 0.4em 0.5em 0.4em;\n",
+       "  box-sizing: border-box;\n",
+       "  padding-bottom: 0.4em;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-container {\n",
+       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
+       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
+       "     so we also need the `!important` here to be able to override the\n",
+       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
+       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
+       "  display: inline-block !important;\n",
+       "  position: relative;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-text-repr-fallback {\n",
+       "  display: none;\n",
+       "}\n",
+       "\n",
+       "div.sk-parallel-item,\n",
+       "div.sk-serial,\n",
+       "div.sk-item {\n",
+       "  /* draw centered vertical line to link estimators */\n",
+       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
+       "  background-size: 2px 100%;\n",
+       "  background-repeat: no-repeat;\n",
+       "  background-position: center center;\n",
+       "}\n",
+       "\n",
+       "/* Parallel-specific style estimator block */\n",
+       "\n",
+       "#sk-container-id-4 div.sk-parallel-item::after {\n",
+       "  content: \"\";\n",
+       "  width: 100%;\n",
+       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
+       "  flex-grow: 1;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-parallel {\n",
+       "  display: flex;\n",
+       "  align-items: stretch;\n",
+       "  justify-content: center;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  position: relative;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-parallel-item {\n",
+       "  display: flex;\n",
+       "  flex-direction: column;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-parallel-item:first-child::after {\n",
+       "  align-self: flex-end;\n",
+       "  width: 50%;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-parallel-item:last-child::after {\n",
+       "  align-self: flex-start;\n",
+       "  width: 50%;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-parallel-item:only-child::after {\n",
+       "  width: 0;\n",
+       "}\n",
+       "\n",
+       "/* Serial-specific style estimator block */\n",
+       "\n",
+       "#sk-container-id-4 div.sk-serial {\n",
+       "  display: flex;\n",
+       "  flex-direction: column;\n",
+       "  align-items: center;\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "  padding-right: 1em;\n",
+       "  padding-left: 1em;\n",
+       "}\n",
+       "\n",
+       "\n",
+       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
+       "clickable and can be expanded/collapsed.\n",
+       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
+       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
+       "*/\n",
+       "\n",
+       "/* Pipeline and ColumnTransformer style (default) */\n",
+       "\n",
+       "#sk-container-id-4 div.sk-toggleable {\n",
+       "  /* Default theme specific background. It is overwritten whether we have a\n",
+       "  specific estimator or a Pipeline/ColumnTransformer */\n",
+       "  background-color: var(--sklearn-color-background);\n",
+       "}\n",
+       "\n",
+       "/* Toggleable label */\n",
+       "#sk-container-id-4 label.sk-toggleable__label {\n",
+       "  cursor: pointer;\n",
+       "  display: flex;\n",
+       "  width: 100%;\n",
+       "  margin-bottom: 0;\n",
+       "  padding: 0.5em;\n",
+       "  box-sizing: border-box;\n",
+       "  text-align: center;\n",
+       "  align-items: center;\n",
+       "  justify-content: center;\n",
+       "  gap: 0.5em;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 label.sk-toggleable__label .caption {\n",
+       "  font-size: 0.6rem;\n",
+       "  font-weight: lighter;\n",
+       "  color: var(--sklearn-color-text-muted);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 label.sk-toggleable__label-arrow:before {\n",
+       "  /* Arrow on the left of the label */\n",
+       "  content: \"▸\";\n",
+       "  float: left;\n",
+       "  margin-right: 0.25em;\n",
+       "  color: var(--sklearn-color-icon);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "}\n",
+       "\n",
+       "/* Toggleable content - dropdown */\n",
+       "\n",
+       "#sk-container-id-4 div.sk-toggleable__content {\n",
+       "  display: none;\n",
+       "  text-align: left;\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-toggleable__content.fitted {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-toggleable__content pre {\n",
+       "  margin: 0.2em;\n",
+       "  border-radius: 0.25em;\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-toggleable__content.fitted pre {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
+       "  /* Expand drop-down */\n",
+       "  display: block;\n",
+       "  width: 100%;\n",
+       "  overflow: visible;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
+       "  content: \"▾\";\n",
+       "}\n",
+       "\n",
+       "/* Pipeline/ColumnTransformer-specific style */\n",
+       "\n",
+       "#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Estimator-specific style */\n",
+       "\n",
+       "/* Colorize estimator box */\n",
+       "#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-label label.sk-toggleable__label,\n",
+       "#sk-container-id-4 div.sk-label label {\n",
+       "  /* The background is the default theme color */\n",
+       "  color: var(--sklearn-color-text-on-default-background);\n",
+       "}\n",
+       "\n",
+       "/* On hover, darken the color of the background */\n",
+       "#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Label box, darken color on hover, fitted */\n",
+       "#sk-container-id-4 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Estimator label */\n",
+       "\n",
+       "#sk-container-id-4 div.sk-label label {\n",
+       "  font-family: monospace;\n",
+       "  font-weight: bold;\n",
+       "  line-height: 1.2em;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-label-container {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "\n",
+       "/* Estimator-specific */\n",
+       "#sk-container-id-4 div.sk-estimator {\n",
+       "  font-family: monospace;\n",
+       "  border: 1px dotted var(--sklearn-color-border-box);\n",
+       "  border-radius: 0.25em;\n",
+       "  box-sizing: border-box;\n",
+       "  margin-bottom: 0.5em;\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-estimator.fitted {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "}\n",
+       "\n",
+       "/* on hover */\n",
+       "#sk-container-id-4 div.sk-estimator:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 div.sk-estimator.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-2);\n",
+       "}\n",
+       "\n",
+       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
+       "\n",
+       "/* Common style for \"i\" and \"?\" */\n",
+       "\n",
+       ".sk-estimator-doc-link,\n",
+       "a:link.sk-estimator-doc-link,\n",
+       "a:visited.sk-estimator-doc-link {\n",
+       "  float: right;\n",
+       "  font-size: smaller;\n",
+       "  line-height: 1em;\n",
+       "  font-family: monospace;\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "  border-radius: 1em;\n",
+       "  height: 1em;\n",
+       "  width: 1em;\n",
+       "  text-decoration: none !important;\n",
+       "  margin-left: 0.5em;\n",
+       "  text-align: center;\n",
+       "  /* unfitted */\n",
+       "  border: var(--sklearn-color-unfitted-level-3) 1pt solid;\n",
+       "  color: var(--sklearn-color-unfitted-level-3);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link.fitted,\n",
+       "a:link.sk-estimator-doc-link.fitted,\n",
+       "a:visited.sk-estimator-doc-link.fitted {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "  border: var(--sklearn-color-fitted-level-3) 1pt solid;\n",
+       "  color: var(--sklearn-color-fitted-level-3);\n",
+       "}\n",
+       "\n",
+       "/* On hover */\n",
+       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
+       ".sk-estimator-doc-link:hover,\n",
+       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
+       ".sk-estimator-doc-link:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
+       "  border: var(--sklearn-color-fitted-level-0) 1pt solid;\n",
+       "  color: var(--sklearn-color-unfitted-level-0);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
+       ".sk-estimator-doc-link.fitted:hover,\n",
+       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
+       ".sk-estimator-doc-link.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-3);\n",
+       "  border: var(--sklearn-color-fitted-level-0) 1pt solid;\n",
+       "  color: var(--sklearn-color-fitted-level-0);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "/* Span, style for the box shown on hovering the info icon */\n",
+       ".sk-estimator-doc-link span {\n",
+       "  display: none;\n",
+       "  z-index: 9999;\n",
+       "  position: relative;\n",
+       "  font-weight: normal;\n",
+       "  right: .2ex;\n",
+       "  padding: .5ex;\n",
+       "  margin: .5ex;\n",
+       "  width: min-content;\n",
+       "  min-width: 20ex;\n",
+       "  max-width: 50ex;\n",
+       "  color: var(--sklearn-color-text);\n",
+       "  box-shadow: 2pt 2pt 4pt #999;\n",
+       "  /* unfitted */\n",
+       "  background: var(--sklearn-color-unfitted-level-0);\n",
+       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link.fitted span {\n",
+       "  /* fitted */\n",
+       "  background: var(--sklearn-color-fitted-level-0);\n",
+       "  border: var(--sklearn-color-fitted-level-3);\n",
+       "}\n",
+       "\n",
+       ".sk-estimator-doc-link:hover span {\n",
+       "  display: block;\n",
+       "}\n",
+       "\n",
+       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
+       "\n",
+       "#sk-container-id-4 a.estimator_doc_link {\n",
+       "  float: right;\n",
+       "  font-size: 1rem;\n",
+       "  line-height: 1em;\n",
+       "  font-family: monospace;\n",
+       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
+       "  border-radius: 1rem;\n",
+       "  height: 1rem;\n",
+       "  width: 1rem;\n",
+       "  text-decoration: none;\n",
+       "  /* unfitted */\n",
+       "  color: var(--sklearn-color-unfitted-level-1);\n",
+       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 a.estimator_doc_link.fitted {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-0);\n",
+       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
+       "  color: var(--sklearn-color-fitted-level-1);\n",
+       "}\n",
+       "\n",
+       "/* On hover */\n",
+       "#sk-container-id-4 a.estimator_doc_link:hover {\n",
+       "  /* unfitted */\n",
+       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
+       "  color: var(--sklearn-color-background);\n",
+       "  text-decoration: none;\n",
+       "}\n",
+       "\n",
+       "#sk-container-id-4 a.estimator_doc_link.fitted:hover {\n",
+       "  /* fitted */\n",
+       "  background-color: var(--sklearn-color-fitted-level-3);\n",
+       "}\n",
+       "\n",
+       ".estimator-table {\n",
+       "    font-family: monospace;\n",
+       "}\n",
+       "\n",
+       ".estimator-table summary {\n",
+       "    padding: .5rem;\n",
+       "    cursor: pointer;\n",
+       "}\n",
+       "\n",
+       ".estimator-table summary::marker {\n",
+       "    font-size: 0.7rem;\n",
+       "}\n",
+       "\n",
+       ".estimator-table details[open] {\n",
+       "    padding-left: 0.1rem;\n",
+       "    padding-right: 0.1rem;\n",
+       "    padding-bottom: 0.3rem;\n",
+       "}\n",
+       "\n",
+       ".estimator-table .parameters-table {\n",
+       "    margin-left: auto !important;\n",
+       "    margin-right: auto !important;\n",
+       "    margin-top: 0;\n",
+       "}\n",
+       "\n",
+       ".estimator-table .parameters-table tr:nth-child(odd) {\n",
+       "    background-color: #fff;\n",
+       "}\n",
+       "\n",
+       ".estimator-table .parameters-table tr:nth-child(even) {\n",
+       "    background-color: #f6f6f6;\n",
+       "}\n",
+       "\n",
+       ".estimator-table .parameters-table tr:hover {\n",
+       "    background-color: #e0e0e0;\n",
+       "}\n",
+       "\n",
+       ".estimator-table table td {\n",
+       "    border: 1px solid rgba(106, 105, 104, 0.232);\n",
+       "}\n",
+       "\n",
+       "/*\n",
+       "    `table td`is set in notebook with right text-align.\n",
+       "    We need to overwrite it.\n",
+       "*/\n",
+       ".estimator-table table td.param {\n",
+       "    text-align: left;\n",
+       "    position: relative;\n",
+       "    padding: 0;\n",
+       "}\n",
+       "\n",
+       ".user-set td {\n",
+       "    color:rgb(255, 94, 0);\n",
+       "    text-align: left !important;\n",
+       "}\n",
+       "\n",
+       ".user-set td.value {\n",
+       "    color:rgb(255, 94, 0);\n",
+       "    background-color: transparent;\n",
+       "}\n",
+       "\n",
+       ".default td {\n",
+       "    color: black;\n",
+       "    text-align: left !important;\n",
+       "}\n",
+       "\n",
+       ".user-set td i,\n",
+       ".default td i {\n",
+       "    color: black;\n",
+       "}\n",
+       "\n",
+       "/*\n",
+       "    Styles for parameter documentation links\n",
+       "    We need styling for visited so jupyter doesn't overwrite it\n",
+       "*/\n",
+       "a.param-doc-link,\n",
+       "a.param-doc-link:link,\n",
+       "a.param-doc-link:visited {\n",
+       "    text-decoration: underline dashed;\n",
+       "    text-underline-offset: .3em;\n",
+       "    color: inherit;\n",
+       "    display: block;\n",
+       "    padding: .5em;\n",
+       "}\n",
+       "\n",
+       "/* \"hack\" to make the entire area of the cell containing the link clickable */\n",
+       "a.param-doc-link::before {\n",
+       "    position: absolute;\n",
+       "    content: \"\";\n",
+       "    inset: 0;\n",
+       "}\n",
+       "\n",
+       ".param-doc-description {\n",
+       "    display: none;\n",
+       "    position: absolute;\n",
+       "    z-index: 9999;\n",
+       "    left: 0;\n",
+       "    padding: .5ex;\n",
+       "    margin-left: 1.5em;\n",
+       "    color: var(--sklearn-color-text);\n",
+       "    box-shadow: .3em .3em .4em #999;\n",
+       "    width: max-content;\n",
+       "    text-align: left;\n",
+       "    max-height: 10em;\n",
+       "    overflow-y: auto;\n",
+       "\n",
+       "    /* unfitted */\n",
+       "    background: var(--sklearn-color-unfitted-level-0);\n",
+       "    border: thin solid var(--sklearn-color-unfitted-level-3);\n",
+       "}\n",
+       "\n",
+       "/* Fitted state for parameter tooltips */\n",
+       ".fitted .param-doc-description {\n",
+       "    /* fitted */\n",
+       "    background: var(--sklearn-color-fitted-level-0);\n",
+       "    border: thin solid var(--sklearn-color-fitted-level-3);\n",
+       "}\n",
+       "\n",
+       ".param-doc-link:hover .param-doc-description {\n",
+       "    display: block;\n",
+       "}\n",
+       "\n",
+       ".copy-paste-icon {\n",
+       "    background-image: url(data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA0NDggNTEyIj48IS0tIUZvbnQgQXdlc29tZSBGcmVlIDYuNy4yIGJ5IEBmb250YXdlc29tZSAtIGh0dHBzOi8vZm9udGF3ZXNvbWUuY29tIExpY2Vuc2UgLSBodHRwczovL2ZvbnRhd2Vzb21lLmNvbS9saWNlbnNlL2ZyZWUgQ29weXJpZ2h0IDIwMjUgRm9udGljb25zLCBJbmMuLS0+PHBhdGggZD0iTTIwOCAwTDMzMi4xIDBjMTIuNyAwIDI0LjkgNS4xIDMzLjkgMTQuMWw2Ny45IDY3LjljOSA5IDE0LjEgMjEuMiAxNC4xIDMzLjlMNDQ4IDMzNmMwIDI2LjUtMjEuNSA0OC00OCA0OGwtMTkyIDBjLTI2LjUgMC00OC0yMS41LTQ4LTQ4bDAtMjg4YzAtMjYuNSAyMS41LTQ4IDQ4LTQ4ek00OCAxMjhsODAgMCAwIDY0LTY0IDAgMCAyNTYgMTkyIDAgMC0zMiA2NCAwIDAgNDhjMCAyNi41LTIxLjUgNDgtNDggNDhMNDggNTEyYy0yNi41IDAtNDgtMjEuNS00OC00OEwwIDE3NmMwLTI2LjUgMjEuNS00OCA0OC00OHoiLz48L3N2Zz4=);\n",
+       "    background-repeat: no-repeat;\n",
+       "    background-size: 14px 14px;\n",
+       "    background-position: 0;\n",
+       "    display: inline-block;\n",
+       "    width: 14px;\n",
+       "    height: 14px;\n",
+       "    cursor: pointer;\n",
+       "}\n",
+       "</style><body><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Ridge(alpha=2.0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" checked><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow\"><div><div>Ridge</div></div><div><a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html\">?<span>Documentation for Ridge</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></div></label><div class=\"sk-toggleable__content fitted\" data-param-prefix=\"\">\n",
+       "        <div class=\"estimator-table\">\n",
+       "            <details>\n",
+       "                <summary>Parameters</summary>\n",
+       "                <table class=\"parameters-table\">\n",
+       "                  <tbody>\n",
+       "                    \n",
+       "        <tr class=\"user-set\">\n",
+       "            <td><i class=\"copy-paste-icon\"\n",
+       "                 onclick=\"copyToClipboard('alpha',\n",
+       "                          this.parentElement.nextElementSibling)\"\n",
+       "            ></i></td>\n",
+       "            <td class=\"param\">\n",
+       "        <a class=\"param-doc-link\"\n",
+       "            rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=alpha,-%7Bfloat%2C%20ndarray%20of%20shape%20%28n_targets%2C%29%7D%2C%20default%3D1.0\">\n",
+       "            alpha\n",
+       "            <span class=\"param-doc-description\">alpha: {float, ndarray of shape (n_targets,)}, default=1.0<br><br>Constant that multiplies the L2 term, controlling regularization<br>strength. `alpha` must be a non-negative float i.e. in `[0, inf)`.<br><br>When `alpha = 0`, the objective is equivalent to ordinary least<br>squares, solved by the :class:`LinearRegression` object. For numerical<br>reasons, using `alpha = 0` with the `Ridge` object is not advised.<br>Instead, you should use the :class:`LinearRegression` object.<br><br>If an array is passed, penalties are assumed to be specific to the<br>targets. Hence they must correspond in number.</span>\n",
+       "        </a>\n",
+       "    </td>\n",
+       "            <td class=\"value\">2.0</td>\n",
+       "        </tr>\n",
+       "    \n",
+       "\n",
+       "        <tr class=\"default\">\n",
+       "            <td><i class=\"copy-paste-icon\"\n",
+       "                 onclick=\"copyToClipboard('fit_intercept',\n",
+       "                          this.parentElement.nextElementSibling)\"\n",
+       "            ></i></td>\n",
+       "            <td class=\"param\">\n",
+       "        <a class=\"param-doc-link\"\n",
+       "            rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=fit_intercept,-bool%2C%20default%3DTrue\">\n",
+       "            fit_intercept\n",
+       "            <span class=\"param-doc-description\">fit_intercept: bool, default=True<br><br>Whether to fit the intercept for this model. If set<br>to false, no intercept will be used in calculations<br>(i.e. ``X`` and ``y`` are expected to be centered).</span>\n",
+       "        </a>\n",
+       "    </td>\n",
+       "            <td class=\"value\">True</td>\n",
+       "        </tr>\n",
+       "    \n",
+       "\n",
+       "        <tr class=\"default\">\n",
+       "            <td><i class=\"copy-paste-icon\"\n",
+       "                 onclick=\"copyToClipboard('copy_X',\n",
+       "                          this.parentElement.nextElementSibling)\"\n",
+       "            ></i></td>\n",
+       "            <td class=\"param\">\n",
+       "        <a class=\"param-doc-link\"\n",
+       "            rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=copy_X,-bool%2C%20default%3DTrue\">\n",
+       "            copy_X\n",
+       "            <span class=\"param-doc-description\">copy_X: bool, default=True<br><br>If True, X will be copied; else, it may be overwritten.</span>\n",
+       "        </a>\n",
+       "    </td>\n",
+       "            <td class=\"value\">True</td>\n",
+       "        </tr>\n",
+       "    \n",
+       "\n",
+       "        <tr class=\"default\">\n",
+       "            <td><i class=\"copy-paste-icon\"\n",
+       "                 onclick=\"copyToClipboard('max_iter',\n",
+       "                          this.parentElement.nextElementSibling)\"\n",
+       "            ></i></td>\n",
+       "            <td class=\"param\">\n",
+       "        <a class=\"param-doc-link\"\n",
+       "            rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=max_iter,-int%2C%20default%3DNone\">\n",
+       "            max_iter\n",
+       "            <span class=\"param-doc-description\">max_iter: int, default=None<br><br>Maximum number of iterations for conjugate gradient solver.<br>For 'sparse_cg' and 'lsqr' solvers, the default value is determined<br>by scipy.sparse.linalg. For 'sag' solver, the default value is 1000.<br>For 'lbfgs' solver, the default value is 15000.</span>\n",
+       "        </a>\n",
+       "    </td>\n",
+       "            <td class=\"value\">None</td>\n",
+       "        </tr>\n",
+       "    \n",
+       "\n",
+       "        <tr class=\"default\">\n",
+       "            <td><i class=\"copy-paste-icon\"\n",
+       "                 onclick=\"copyToClipboard('tol',\n",
+       "                          this.parentElement.nextElementSibling)\"\n",
+       "            ></i></td>\n",
+       "            <td class=\"param\">\n",
+       "        <a class=\"param-doc-link\"\n",
+       "            rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=tol,-float%2C%20default%3D1e-4\">\n",
+       "            tol\n",
+       "            <span class=\"param-doc-description\">tol: float, default=1e-4<br><br>The precision of the solution (`coef_`) is determined by `tol` which<br>specifies a different convergence criterion for each solver:<br><br>- 'svd': `tol` has no impact.<br><br>- 'cholesky': `tol` has no impact.<br><br>- 'sparse_cg': norm of residuals smaller than `tol`.<br><br>- 'lsqr': `tol` is set as atol and btol of scipy.sparse.linalg.lsqr,<br>  which control the norm of the residual vector in terms of the norms of<br>  matrix and coefficients.<br><br>- 'sag' and 'saga': relative change of coef smaller than `tol`.<br><br>- 'lbfgs': maximum of the absolute (projected) gradient=max|residuals|<br>  smaller than `tol`.<br><br>.. versionchanged:: 1.2<br>   Default value changed from 1e-3 to 1e-4 for consistency with other linear<br>   models.</span>\n",
+       "        </a>\n",
+       "    </td>\n",
+       "            <td class=\"value\">0.0001</td>\n",
+       "        </tr>\n",
+       "    \n",
+       "\n",
+       "        <tr class=\"default\">\n",
+       "            <td><i class=\"copy-paste-icon\"\n",
+       "                 onclick=\"copyToClipboard('solver',\n",
+       "                          this.parentElement.nextElementSibling)\"\n",
+       "            ></i></td>\n",
+       "            <td class=\"param\">\n",
+       "        <a class=\"param-doc-link\"\n",
+       "            rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=solver,-%7B%27auto%27%2C%20%27svd%27%2C%20%27cholesky%27%2C%20%27lsqr%27%2C%20%27sparse_cg%27%2C%20%20%20%20%20%20%20%20%20%20%20%20%20%27sag%27%2C%20%27saga%27%2C%20%27lbfgs%27%7D%2C%20default%3D%27auto%27\">\n",
+       "            solver\n",
+       "            <span class=\"param-doc-description\">solver: {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg',             'sag', 'saga', 'lbfgs'}, default='auto'<br><br>Solver to use in the computational routines:<br><br>- 'auto' chooses the solver automatically based on the type of data.<br><br>- 'svd' uses a Singular Value Decomposition of X to compute the Ridge<br>  coefficients. It is the most stable solver, in particular more stable<br>  for singular matrices than 'cholesky' at the cost of being slower.<br><br>- 'cholesky' uses the standard :func:`scipy.linalg.solve` function to<br>  obtain a closed-form solution.<br><br>- 'sparse_cg' uses the conjugate gradient solver as found in<br>  :func:`scipy.sparse.linalg.cg`. As an iterative algorithm, this solver is<br>  more appropriate than 'cholesky' for large-scale data<br>  (possibility to set `tol` and `max_iter`).<br><br>- 'lsqr' uses the dedicated regularized least-squares routine<br>  :func:`scipy.sparse.linalg.lsqr`. It is the fastest and uses an iterative<br>  procedure.<br><br>- 'sag' uses a Stochastic Average Gradient descent, and 'saga' uses<br>  its improved, unbiased version named SAGA. Both methods also use an<br>  iterative procedure, and are often faster than other solvers when<br>  both n_samples and n_features are large. Note that 'sag' and<br>  'saga' fast convergence is only guaranteed on features with<br>  approximately the same scale. You can preprocess the data with a<br>  scaler from :mod:`sklearn.preprocessing`.<br><br>- 'lbfgs' uses L-BFGS-B algorithm implemented in<br>  :func:`scipy.optimize.minimize`. It can be used only when `positive`<br>  is True.<br><br>All solvers except 'svd' support both dense and sparse data. However, only<br>'lsqr', 'sag', 'sparse_cg', and 'lbfgs' support sparse input when<br>`fit_intercept` is True.<br><br>.. versionadded:: 0.17<br>   Stochastic Average Gradient descent solver.<br>.. versionadded:: 0.19<br>   SAGA solver.</span>\n",
+       "        </a>\n",
+       "    </td>\n",
+       "            <td class=\"value\">&#x27;auto&#x27;</td>\n",
+       "        </tr>\n",
+       "    \n",
+       "\n",
+       "        <tr class=\"default\">\n",
+       "            <td><i class=\"copy-paste-icon\"\n",
+       "                 onclick=\"copyToClipboard('positive',\n",
+       "                          this.parentElement.nextElementSibling)\"\n",
+       "            ></i></td>\n",
+       "            <td class=\"param\">\n",
+       "        <a class=\"param-doc-link\"\n",
+       "            rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=positive,-bool%2C%20default%3DFalse\">\n",
+       "            positive\n",
+       "            <span class=\"param-doc-description\">positive: bool, default=False<br><br>When set to ``True``, forces the coefficients to be positive.<br>Only 'lbfgs' solver is supported in this case.</span>\n",
+       "        </a>\n",
+       "    </td>\n",
+       "            <td class=\"value\">False</td>\n",
+       "        </tr>\n",
+       "    \n",
+       "\n",
+       "        <tr class=\"default\">\n",
+       "            <td><i class=\"copy-paste-icon\"\n",
+       "                 onclick=\"copyToClipboard('random_state',\n",
+       "                          this.parentElement.nextElementSibling)\"\n",
+       "            ></i></td>\n",
+       "            <td class=\"param\">\n",
+       "        <a class=\"param-doc-link\"\n",
+       "            rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.8/modules/generated/sklearn.linear_model.Ridge.html#:~:text=random_state,-int%2C%20RandomState%20instance%2C%20default%3DNone\">\n",
+       "            random_state\n",
+       "            <span class=\"param-doc-description\">random_state: int, RandomState instance, default=None<br><br>Used when ``solver`` == 'sag' or 'saga' to shuffle the data.<br>See :term:`Glossary <random_state>` for details.<br><br>.. versionadded:: 0.17<br>   `random_state` to support Stochastic Average Gradient.</span>\n",
+       "        </a>\n",
+       "    </td>\n",
+       "            <td class=\"value\">None</td>\n",
+       "        </tr>\n",
+       "    \n",
+       "                  </tbody>\n",
+       "                </table>\n",
+       "            </details>\n",
+       "        </div>\n",
+       "    </div></div></div></div></div><script>function copyToClipboard(text, element) {\n",
+       "    // Get the parameter prefix from the closest toggleable content\n",
+       "    const toggleableContent = element.closest('.sk-toggleable__content');\n",
+       "    const paramPrefix = toggleableContent ? toggleableContent.dataset.paramPrefix : '';\n",
+       "    const fullParamName = paramPrefix ? `${paramPrefix}${text}` : text;\n",
+       "\n",
+       "    const originalStyle = element.style;\n",
+       "    const computedStyle = window.getComputedStyle(element);\n",
+       "    const originalWidth = computedStyle.width;\n",
+       "    const originalHTML = element.innerHTML.replace('Copied!', '');\n",
+       "\n",
+       "    navigator.clipboard.writeText(fullParamName)\n",
+       "        .then(() => {\n",
+       "            element.style.width = originalWidth;\n",
+       "            element.style.color = 'green';\n",
+       "            element.innerHTML = \"Copied!\";\n",
+       "\n",
+       "            setTimeout(() => {\n",
+       "                element.innerHTML = originalHTML;\n",
+       "                element.style = originalStyle;\n",
+       "            }, 2000);\n",
+       "        })\n",
+       "        .catch(err => {\n",
+       "            console.error('Failed to copy:', err);\n",
+       "            element.style.color = 'red';\n",
+       "            element.innerHTML = \"Failed!\";\n",
+       "            setTimeout(() => {\n",
+       "                element.innerHTML = originalHTML;\n",
+       "                element.style = originalStyle;\n",
+       "            }, 2000);\n",
+       "        });\n",
+       "    return false;\n",
+       "}\n",
+       "\n",
+       "document.querySelectorAll('.copy-paste-icon').forEach(function(element) {\n",
+       "    const toggleableContent = element.closest('.sk-toggleable__content');\n",
+       "    const paramPrefix = toggleableContent ? toggleableContent.dataset.paramPrefix : '';\n",
+       "    const paramName = element.parentElement.nextElementSibling\n",
+       "        .textContent.trim().split(' ')[0];\n",
+       "    const fullParamName = paramPrefix ? `${paramPrefix}${paramName}` : paramName;\n",
+       "\n",
+       "    element.setAttribute('title', fullParamName);\n",
+       "});\n",
+       "\n",
+       "\n",
+       "/**\n",
+       " * Adapted from Skrub\n",
+       " * https://github.com/skrub-data/skrub/blob/403466d1d5d4dc76a7ef569b3f8228db59a31dc3/skrub/_reporting/_data/templates/report.js#L789\n",
+       " * @returns \"light\" or \"dark\"\n",
+       " */\n",
+       "function detectTheme(element) {\n",
+       "    const body = document.querySelector('body');\n",
+       "\n",
+       "    // Check VSCode theme\n",
+       "    const themeKindAttr = body.getAttribute('data-vscode-theme-kind');\n",
+       "    const themeNameAttr = body.getAttribute('data-vscode-theme-name');\n",
+       "\n",
+       "    if (themeKindAttr && themeNameAttr) {\n",
+       "        const themeKind = themeKindAttr.toLowerCase();\n",
+       "        const themeName = themeNameAttr.toLowerCase();\n",
+       "\n",
+       "        if (themeKind.includes(\"dark\") || themeName.includes(\"dark\")) {\n",
+       "            return \"dark\";\n",
+       "        }\n",
+       "        if (themeKind.includes(\"light\") || themeName.includes(\"light\")) {\n",
+       "            return \"light\";\n",
+       "        }\n",
+       "    }\n",
+       "\n",
+       "    // Check Jupyter theme\n",
+       "    if (body.getAttribute('data-jp-theme-light') === 'false') {\n",
+       "        return 'dark';\n",
+       "    } else if (body.getAttribute('data-jp-theme-light') === 'true') {\n",
+       "        return 'light';\n",
+       "    }\n",
+       "\n",
+       "    // Guess based on a parent element's color\n",
+       "    const color = window.getComputedStyle(element.parentNode, null).getPropertyValue('color');\n",
+       "    const match = color.match(/^rgb\\s*\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)\\s*$/i);\n",
+       "    if (match) {\n",
+       "        const [r, g, b] = [\n",
+       "            parseFloat(match[1]),\n",
+       "            parseFloat(match[2]),\n",
+       "            parseFloat(match[3])\n",
+       "        ];\n",
+       "\n",
+       "        // https://en.wikipedia.org/wiki/HSL_and_HSV#Lightness\n",
+       "        const luma = 0.299 * r + 0.587 * g + 0.114 * b;\n",
+       "\n",
+       "        if (luma > 180) {\n",
+       "            // If the text is very bright we have a dark theme\n",
+       "            return 'dark';\n",
+       "        }\n",
+       "        if (luma < 75) {\n",
+       "            // If the text is very dark we have a light theme\n",
+       "            return 'light';\n",
+       "        }\n",
+       "        // Otherwise fall back to the next heuristic.\n",
+       "    }\n",
+       "\n",
+       "    // Fallback to system preference\n",
+       "    return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light';\n",
+       "}\n",
+       "\n",
+       "\n",
+       "function forceTheme(elementId) {\n",
+       "    const estimatorElement = document.querySelector(`#${elementId}`);\n",
+       "    if (estimatorElement === null) {\n",
+       "        console.error(`Element with id ${elementId} not found.`);\n",
+       "    } else {\n",
+       "        const theme = detectTheme(estimatorElement);\n",
+       "        estimatorElement.classList.add(theme);\n",
+       "    }\n",
+       "}\n",
+       "\n",
+       "forceTheme('sk-container-id-4');</script></body>"
+      ],
+      "text/plain": [
+       "Ridge(alpha=2.0)"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#creation of the model\n",
+    "model = LinearRegression()\n",
+    "model.fit(x_train,y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "8ccc5166-fb20-44a7-95bc-925e5b3c8f4d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "la valeur de a est : [0.9314094]\n",
+      "la valeur de b est  -11.480004226844926\n"
+     ]
+    }
+   ],
+   "source": [
+    "#y=ax+b\n",
+    "print('la valeur de a est :',model.coef_)\n",
+    "print('la valeur de b est ',model.intercept_)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "93239f43-5f1f-4a35-9479-0b9de2752743",
+   "metadata": {},
+   "source": [
+    "### 🧠 Interprétation des Résultats\n",
+    "\n",
+    "Voici ce que les mathématiques racontent sur la réalité de nos écoles :\n",
+    "\n",
+    "Mon modèle a trouvé l'équation suivante :\n",
+    "$$Absents = 0.066 \\times Inscrits + 11.12$$\n",
+    "\n",
+    "**1. Le Coefficient ($a \\approx 0.066$) : Le Taux d'Absentéisme**\n",
+    "* C'est la pente de la droite.\n",
+    "* Cela signifie que **pour chaque nouvel élève inscrit**, le nombre d'absents augmente d'environ **0.066**.\n",
+    "* *En clair :* Sur un groupe de 100 élèves, on peut s'attendre statistiquement à ce qu'environ **6 ou 7** soient absents ($100 \\times 0.066 = 6.6$).\n",
+    "\n",
+    "**2. L'Intercept ($b \\approx 11.12$) : Le Calibrage**\n",
+    "* C'est l'ordonnée à l'origine (le point de départ de la droite).\n",
+    "* Théoriquement, cela voudrait dire qu'une école avec **0 élève** aurait quand même **11 absents**.\n",
+    "* *En réalité :* C'est physiquement impossible, mais c'est un ajustement mathématique nécessaire pour que la ligne droite passe au mieux au milieu du nuage de points."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "84752c5b-601b-4923-9774-6c5d05d0a212",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 221.37234507  183.18455978  497.06952663 ...  379.71194259  384.36898957\n",
+      " 1263.6194605 ]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#prediction\n",
+    "predict = model.predict(x_test)\n",
+    "print(predict)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "4c4cc377-b7a5-4640-baac-097637abfe12",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHHCAYAAABeLEexAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAiRRJREFUeJzt3XlcVFX/wPHPAIICAoosKoj7vmsZpqlJ4lYZWm6plWW5lLimbWqbZptmaam/R3tKs0xK09RIcTcrU3PLTDFXcAVcQYbz++M+M3JhgBmYYVi+79frvnDOPffec++MzJezGpRSCiGEEEKIUszF2QUQQgghhHA2CYiEEEIIUepJQCSEEEKIUk8CIiGEEEKUehIQCSGEEKLUk4BICCGEEKWeBERCCCGEKPUkIBJCCCFEqScBkdDZsmULr7/+OsnJyc4uiihCVq1axTvvvMPt27edXRQhhHAICYiE2b///kuvXr0oX748vr6+Vh1TvXp1nnjiCfPrTZs2YTAY2LRpk93KZTAYmDp1qt3OV1BPPPEE1atXt9v5pk6disFgsNv57O3333+nX79+1K5dmzJlyji7OIUmIyODxo0b89Zbbzm7KA5h78+xo61btw5vb28uXLhg03EjRozggQcecFCpxD333MPEiROdXQy7kICoGFu8eDEGg8G8lS1blrp16zJq1CgSExNtOtft27fp27cvTzzxBGPGjHFQiXP2448/FqmgpzgzBVimzdPTk4YNG/LKK6+QkpJi07mSkpJ47LHHmDFjBr1793ZQiYumr776ilOnTjFq1CiL++fOnYvBYKBNmzYW9x86dIipU6dy4sQJi8cuXrzYjqUt+bp27Urt2rWZPn261cfEx8ezcOFCXnrpJXPaiRMndP8/XFxcqFixIt26dWPnzp3ZzrFhwwaeeuop6tati6enJzVr1uTpp5/m3LlzdrkvWyUlJTFs2DACAgLw8vKiU6dO/PHHH1Yf//HHH9OgQQM8PDyoWrUqY8eO5fr169nynTt3jmHDhlGjRg3KlStHrVq1GDt2LJcuXdLle/HFF/nkk09ISEgo8L05nRLF1qJFixSgXn/9dfXFF1+oBQsWqCFDhigXFxdVo0YNdf36davPtWfPHvXhhx+qjIwMm8oQFhamhgwZYn5tNBrVzZs3ldFotOk8I0eOVDl9HG/evKlu375t0/kcaciQISosLMxu55syZUqO916Q882bN0998cUXat68eeqRRx5RgAoPD7fpPY6Li1P/93//Z7eyFSfNmjVTw4YNy3F/27ZtVfXq1RWgjh49mm3/8uXLFaDi4uKy7WvUqJHq0KGDHUtrO3t/jgvD3Llzlaenp0pJSbEq/+jRo1XdunV1afHx8QpQ/fv3V1988YVavHixeumll5Sfn5/y8PBQf/75py5/q1atVI0aNdTEiRPVggUL1OTJk1X58uVVUFCQOnfunN3uzRpGo1G1bdtWeXl5qalTp6qPP/5YNWzYUJUvX179/fffeR4/ceJEBag+ffqoefPmqeeff165ubmpLl266PJdvXpVhYWFqUqVKqnXXntNLViwQI0aNUqVKVNGNW/eXPf73Wg0quDgYPXqq6/a/X4LmwRExZgpIPrtt9906WPHjlWAWrp0aY7HXrt2zS5lyBoQ5VduAVFRU1wCogsXLujSo6KiFKB27NiR47G2BNEl2R9//KEA9fPPP1vcf/z4cQWomJgYFRAQoKZOnZotjwRE9peYmKhcXV2tCtLT0tJUpUqV1CuvvKJLNwVE7777ri597dq1ClDDhw/XpW/evDnbH3ibN29WgHr55ZfzeSf58/XXXytALV++3Jx2/vx55efnp/r375/rsWfPnlVubm5q0KBBuvQ5c+YoQK1atcqctmTJEgWo1atX6/K+9tprClB//PGHLn3UqFEqLCzM5j+oixppMiuB7r//fkCrLgatr4C3tzfHjh2je/fulC9fnoEDBwJaP4lZs2bRqFEjypYtS1BQEM8++yxXrlzRnVMpxZtvvklISAienp506tSJgwcPZrt2Tn2Idu3aRffu3alQoQJeXl40bdqU2bNnm8v3ySefAOiqsk0s9SHas2cP3bp1w8fHB29vbzp37swvv/yiy2NqUty+fTtjx441VzE/8sgjVvdD+P7772ncuDFly5alcePGfPfddxbzWfscrbVo0SLuv/9+AgMD8fDwoGHDhsybNy9f5zLJ+rno2LEjjRs3Zvfu3dx33314enqamxZSU1OZMmUKtWvXxsPDg9DQUCZOnEhqaqrunLGxsbRr1w4/Pz+8vb2pV6+ernnClnMZDAZGjRplfuYeHh40atSIdevWZbuXM2fOMHToUKpUqYKHhwc1atRg+PDhpKWlmfMkJSURHR1NaGgoHh4e1K5dm3feeYeMjIw8n9X333+Pu7s79913n8X9S5YsoUKFCvTo0YM+ffqwZMkS3f7Fixfz6KOPAtCpUyfzZ3rTpk1Ur16dgwcPsnnzZnN6x44dbSq3qdnnvffeY/78+dSqVQsPDw/uuusufvvtN4v3Y8/PcfXq1enZsyfbtm3j7rvvpmzZstSsWZP//ve/2c6ZlJTEmDFjqF69Oh4eHoSEhDB48GAuXrzItWvX8PLyYvTo0dmOO336NK6urromssDAQJo2bcrKlSstlj+zbdu2cfHiRSIiIvLMC9C+fXsAjh07pku/7777cHFxyZZWsWJFDh8+bNW57eXbb78lKCiIqKgoc1pAQACPPfYYK1euzPZ/KrOdO3eSnp5Ov379dOmm18uWLTOnmZrWg4KCdHkrV64MQLly5XTpDzzwAP/++y979+61/aaKEDdnF0DYn+k/tL+/vzktPT2dyMhI2rVrx3vvvYenpycAzz77LIsXL+bJJ5/khRdeID4+no8//pg9e/awfft2cyfa1157jTfffJPu3bvTvXt3/vjjD7p06aL7AspJbGwsPXv2pHLlyowePZrg4GAOHz7M6tWrGT16NM8++yxnz54lNjaWL774Is/zHTx4kPbt2+Pj48PEiRMpU6YMn332GR07dmTz5s3Z+nQ8//zzVKhQgSlTpnDixAlmzZrFqFGj+Prrr3O9zk8//UTv3r1p2LAh06dP59KlSzz55JOEhIRky2vtc7TWvHnzaNSoEQ899BBubm788MMPjBgxgoyMDEaOHGnTuUwsfS4uXbpEt27d6NevH48//jhBQUFkZGTw0EMPsW3bNoYNG0aDBg3Yv38/H374IX///Tfff/89oL0PPXv2pGnTprz++ut4eHjwzz//sH37dvP5rT2XybZt24iJiWHEiBGUL1+ejz76iN69e3Py5Elzuc+ePcvdd99t7ktRv359zpw5w7fffsuNGzdwd3fnxo0bdOjQgTNnzvDss89SrVo1duzYweTJkzl37hyzZs3K9Vnt2LGDxo0b5/i+LVmyhKioKNzd3enfvz/z5s3jt99+46677gK0L8wXXniBjz76iJdeeokGDRoA0KBBA2bNmsXzzz+Pt7c3L7/8MnDni8fWci9dupSrV6/y7LPPYjAYmDlzJlFRURw/ftxcdkd9jv/55x/69OnD0KFDGTJkCP/5z3944oknaNWqFY0aNQLg2rVrtG/fnsOHD/PUU0/RsmVLLl68yKpVqzh9+jTNmzfnkUce4euvv+aDDz7A1dXVfP6vvvoKpZT5jzeTVq1aZfvc5PQeGgwGWrRokWdewNzXq0KFCnnmvXbtGteuXaNSpUp55r1x4wY3btzIM5+rq2ue196zZw8tW7bMFqDdfffdzJ8/n7///psmTZpYPNYULGUNZkzfBbt37zanmYLA0aNH8/777xMSEsKff/7JW2+9Ra9evahfv77uHK1atQJg+/btVj/vIsnZVVQi/0xNZj///LO6cOGCOnXqlFq2bJny9/dX5cqVU6dPn1ZKaVXjgJo0aZLu+K1btypALVmyRJe+bt06Xfr58+eVu7u76tGjh65K9KWXXlKArsksLi5O10yQnp6uatSoocLCwtSVK1d018l8rtyazAA1ZcoU8+tevXopd3d3dezYMXPa2bNnVfny5dV9992X7flERETorjVmzBjl6uqqkpKSLF7PpHnz5qpy5cq6fD/99JMCdE0N1j7HnFhqMrtx40a2fJGRkapmzZq5nivz+Y4cOaIuXLig4uPj1WeffaY8PDxUUFCQuVmsQ4cOClCffvqp7vgvvvhCubi4qK1bt+rSP/30UwWo7du3K6WU+vDDDy02zeXnXEpp77O7u7v6559/zGn79u1TgJozZ445bfDgwcrFxSVbU7FSdz5Tb7zxhvLy8srWr2LSpEnK1dVVnTx5MscyK6VUSEiI6t27t8V9v//+uwJUbGys+ZohISFq9OjRunz5aTKzttymZh9/f391+fJlc76VK1cqQP3www/mNEd8jsPCwhSgtmzZYk47f/688vDwUOPGjTOnmZpYYmJist2r6b1av369AtTatWt1+5s2bWrxGb399tsKUImJidn2Zfb4448rf3//bOmmZzdt2jR14cIFlZCQoLZu3aruuuuubM1ROXnjjTcUoDZs2JBnXtP/x7w2a5ovvby81FNPPZUtfc2aNQpQ69aty/HY3bt3K0C98cYbunTT++vt7a1LX7hwofLz89OVcciQITn253R3d8/W3FjcSJNZCRAREUFAQAChoaH069cPb29vvvvuO6pWrarLN3z4cN3r5cuX4+vrywMPPMDFixfNW6tWrfD29iYuLg6An3/+mbS0NJ5//nldU1Z0dHSeZduzZw/x8fFER0fj5+en25efoeZGo5GffvqJXr16UbNmTXN65cqVGTBgANu2bcs2kmrYsGG6a7Vv3x6j0ci///6b43XOnTvH3r17GTJkiG4KggceeICGDRvq8lr7HG2R+a+45ORkLl68SIcOHTh+/LjVc0TVq1ePgIAAatSowbPPPkvt2rVZs2aN+S9CAA8PD5588sls99OgQQPq16+vux9Tk5vpfkzv58qVK3NshrL2XCYRERHUqlXL/Lpp06b4+Phw/PhxQKtx+v7773nwwQdp3bp1tuuZ3ufly5fTvn17KlSooLtuREQERqORLVu25PrsLl26lONf60uWLCEoKIhOnTqZr9m3b1+WLVuG0WjM9bx5sbXcffv21ZXT1Oxjel6O/Bw3bNjQfD3Qmm7q1atnvjbAihUraNasGY888ki2ezW9VxEREVSpUkXX7HjgwAH+/PNPHn/88WzHme734sWLlh6hWW7vIcCUKVMICAggODjYXIv1/vvv06dPn1zPu2XLFqZNm8Zjjz1m/hznZvDgwcTGxua5ZW12teTmzZt4eHhkSy9btqx5f05atmxJmzZteOedd1i0aBEnTpxg7dq1PPvss5QpUybbsVWrVuXuu+9m1qxZfPfdd4wdO5YlS5YwadIki+c3fWaLM2kyKwE++eQT6tati5ubG0FBQdSrVy9blaqbm1u2KvKjR4+SnJxMYGCgxfOeP38ewBw41KlTR7c/ICAgzypeUzNN48aNrb+hXFy4cIEbN25Qr169bPsaNGhARkYGp06dMlfZA1SrVk2Xz1Tm3Pr35HTPoAUamYe5WvscbbF9+3amTJnCzp07s1W3JycnWzVP1IoVK/Dx8aFMmTKEhIToAg2TqlWr4u7urks7evQohw8fJiAgwOJ5TffTt29fFi5cyNNPP82kSZPo3LkzUVFR9OnTx/z5s/ZcJlnfK9DeL9N7deHCBVJSUvL8PB09epQ///zT6utaopTKlmY0Glm2bBmdOnUy98UCaNOmDe+//z4bNmygS5cueZ7bXuXO67PtyM9xXu8VaP//85quwcXFhYEDBzJv3jxu3LiBp6cnS5YsoWzZsuZ+WJmZ3hdr/qCy9B6aDBs2jEcffZRbt26xceNGPvroozwD2r/++otHHnmExo0bs3DhwjyvD1CzZk3dH28FUa5cOYv9hG7dumXen5sVK1bQt29fnnrqKUBrphs7diybN2/myJEj5nzbt2+nZ8+e/PLLL+Y/PHr16oWPjw/Tpk3jqaeeyhZQK6WK9Hxq1pCAqAS4++67Lf61nJmHh0e2ICkjI4PAwMAc/zLJ6ZdycZO5X0Jmuf2ytIW9n+OxY8fo3Lkz9evX54MPPiA0NBR3d3d+/PFHPvzwQ6s6BYPWDyCvPg6WfoFmZGTQpEkTPvjgA4vHhIaGmo/dsmULcXFxrFmzhnXr1vH1119z//3389NPP+Hq6mr1uUzs9V5lZGTwwAMP5DhhXN26dXM93t/f32LAvHHjRs6dO8eyZct0nVBNlixZUqCAyNZy2/Ozbevn2J7XHjx4MO+++y7ff/89/fv3Z+nSpfTs2dNi4G96X/L6bOf0HprUqVPH3OG6Z8+euLq6MmnSJDp16mTx9+mpU6fo0qULvr6+/Pjjj5QvX96qezP1N8qLq6trnr8rKleubHH+I1NalSpVcj2+atWqbNu2jaNHj5KQkECdOnUIDg6mSpUqus/WZ599RlBQULbn8NBDDzF16lR27NiRLSBKSkqyqk9VUSYBUSlWq1Ytfv75Z+69995c/7IICwsDtL8gM/+lc+HChTxHUZlqJQ4cOJDraA9r/7IICAjA09NT99eMyV9//YWLi0u2L9n8yHzPWWW9trXP0Vo//PADqamprFq1SvdXeH6a3vKjVq1a7Nu3j86dO+f5vri4uNC5c2c6d+7MBx98wNtvv83LL79MXFycufnL2nNZIyAgAB8fHw4cOJDnPVy7ds3qEUZZ1a9fX1cDZLJkyRICAwPNoyIzi4mJ4bvvvuPTTz+lXLlyud5vTvsKWu6snPk5Np0zr/cKtBrkFi1asGTJEkJCQjh58iRz5syxmDc+Pp5KlSrlGTzUr1+fJUuWWF2j+vLLL7NgwQJeeeWVbCMbL126RJcuXUhNTWXDhg3m0VbWeO+995g2bVqe+cLCwixO4plZ8+bN2bp1KxkZGbo/cHft2oWnp2eegb5JnTp1zLWGhw4d4ty5c7oVBxITEy3WlpmW7klPT9elnzlzhrS0NPPggeJK+hCVYo899hhGo5E33ngj27709HSSkpIArY2/TJkyzJkzR/fXX14jdUBrt65RowazZs0yn88k87m8vLwAsuXJytXVlS5durBy5UrdL4/ExESWLl1Ku3bt8PHxybNcealcuTLNmzfn888/1/XZiY2N5dChQ7q81j5Ha5n+8s78fJKTk1m0aJFN58mvxx57jDNnzrBgwYJs+27evGme1fby5cvZ9jdv3hy4M6LF2nNZy8XFhV69evHDDz/w+++/Z9tvemaPPfYYO3fuZP369dnyJCUlZfuFnlV4eDgHDhzQNU/cvHmTmJgYevbsSZ8+fbJto0aN4urVq6xatQrI/TPt5eVlMb2g5c7KmZ9jgN69e7Nv3z6Lw/yz1iQNGjSIn376iVmzZuHv70+3bt0snnP37t2Eh4fnee3w8HCUUrrRU7nx8/Pj2WefZf369brh49evX6d79+6cOXOGH3/80WLzY27s2YeoT58+JCYmEhMTY067ePEiy5cv58EHH9T1Lzp27Fi2KQSyysjIYOLEiXh6evLcc8+Z0+vWrUtiYmK26VO++uorgGwjyUzPuG3btnneQ1EmNUSlWIcOHXj22WeZPn06e/fupUuXLpQpU4ajR4+yfPlyZs+eTZ8+fQgICGD8+PFMnz6dnj170r17d/bs2cPatWvzrCJ1cXFh3rx5PPjggzRv3pwnn3ySypUr89dff3Hw4EHzL37TsM0XXniByMhIXF1ds82XYfLmm2+a578ZMWIEbm5ufPbZZ6SmpjJz5ky7PZ/p06fTo0cP2rVrx1NPPcXly5eZM2cOjRo10lWBW/scrdWlSxfc3d158MEHefbZZ7l27RoLFiwgMDCwUJYLGDRoEN988w3PPfcccXFx3HvvvRiNRv766y+++eYb1q9fT+vWrXn99dfZsmULPXr0ICwsjPPnzzN37lxCQkJo166dTeeyxdtvv81PP/1Ehw4dzEP5z507x/Lly9m2bRt+fn5MmDCBVatW0bNnT/NQ8OvXr7N//36+/fZbTpw4ketn9+GHH+aNN95g8+bN5iawVatWcfXqVR566CGLx9xzzz0EBASwZMkS+vbtS/PmzXF1deWdd94hOTkZDw8P89xSrVq1Yt68ebz55pvUrl2bwMBA7r///gKX2xJnfY4BJkyYwLfffsujjz7KU089RatWrbh8+TKrVq3i008/pVmzZua8AwYMYOLEiXz33XcMHz7c4pQH58+f588//7Rq6ol27drh7+/Pzz//bFXnZ4DRo0cza9YsZsyYYW4SHThwIL/++itPPfUUhw8f1s095O3tTa9evXI9pz37EPXp04d77rmHJ598kkOHDlGpUiXmzp2L0WjMVgvVuXNnAN0fjqNHj+bWrVs0b96c27dvs3TpUn799Vc+//xzXW30qFGjWLRoEQ8++CDPP/88YWFhbN68ma+++ooHHngg29QmsbGxVKtWrXgPuQcZdl+c5TRTdVZDhgxRXl5eOe6fP3++atWqlSpXrpwqX768atKkiZo4caI6e/asOY/RaFTTpk1TlStXVuXKlVMdO3ZUBw4cyDZTddZh9ybbtm1TDzzwgCpfvrzy8vJSTZs21Q2lTk9PV88//7wKCAhQBoNBNwydLMPuldJmEo6MjFTe3t7K09NTderUKdsMzDk9n5zKaMmKFStUgwYNlIeHh2rYsKGKiYnJcYZfa56jJZaG3a9atUo1bdpUlS1bVlWvXl2988476j//+Y8CVHx8vFXny204vFLasPtGjRpZ3JeWlqbeeecd1ahRI+Xh4aEqVKigWrVqpaZNm6aSk5OVUkpt2LBBPfzww6pKlSrK3d1dValSRfXv3z/bkHFrzqWU9j6PHDkyW1kszYb+77//qsGDB6uAgADl4eGhatasqUaOHKlSU1PNea5evaomT56sateurdzd3VWlSpVU27Zt1XvvvafS0tJyfTZKacO+hw4dan794IMPqrJly+Y6m/cTTzyhypQpoy5evKiUUmrBggWqZs2aytXVVfeZS0hIUD169FDly5dXgG54uTXlzmm2ZdNzzPr/xd6f47CwMNWjR49sx3bo0CHbUPlLly6pUaNGqapVqyp3d3cVEhKihgwZYn5GmXXv3j3X2dTnzZtn09IdL7zwgqpdu7YuLbdnp5T2Hrq6upqnfzBNMWBpc8ZM35cvX1ZDhw5V/v7+ytPTU3Xo0MHid0BYWFi28i1atEg1a9ZMeXl5qfLly6vOnTurjRs3WrzOX3/9pfr06aNCQ0NVmTJlVFhYmBo/fny2z7/RaFSVK1fONiN4cWRQyk49S4UQogT54osvGDlyJCdPnsw2ZYRwjEceeYT9+/fzzz//WNzfokULOnbsyIcffmjV+Y4fP079+vVZu3atucZE2Nf333/PgAEDOHbsmE19q4oi6UMkhBAWDBw4kGrVqlnsQC3s79y5c6xZs4ZBgwZZ3L9u3TqOHj3K5MmTrT5nzZo1GTp0KDNmzLBXMUUW77zzDqNGjSr2wRCA1BAJIYRwmvj4eLZv387ChQv57bffOHbsGMHBwc4uliiFpIZICCGE02zevJlBgwYRHx/P559/LsGQcBqpIRJCCCFEqSc1REIIIYQo9SQgEkIIIUSpJxMzWiEjI4OzZ89Svnz5Yr94nRBCCFFaKKW4evUqVapUybaeZ1YSEFnh7NmzdlkfSwghhBCF79SpU4SEhOSaRwIiK5hWNT516pRd1skSQgghhOOlpKQQGhpq/h7PjQREVjA1k/n4+EhAJIQQQhQz1nR3kU7VQgghhCj1nB4QnTlzhscffxx/f3/KlStHkyZN+P333837lVK89tprVK5cmXLlyhEREcHRo0d157h8+TIDBw7Ex8cHPz8/hg4dqlvFGeDPP/+kffv2lC1bltDQULuuii6EEEKI4s2pAdGVK1e49957KVOmDGvXruXQoUO8//77VKhQwZxn5syZfPTRR3z66afs2rULLy8vIiMjuXXrljnPwIEDOXjwILGxsaxevZotW7YwbNgw8/6UlBS6dOlCWFgYu3fv5t1332Xq1KnMnz+/UO9XCCGEEEWTU2eqnjRpEtu3b2fr1q0W9yulqFKlCuPGjWP8+PEAJCcnExQUxOLFi+nXrx+HDx+mYcOG/Pbbb7Ru3RrQFgHs3r07p0+fpkqVKsybN4+XX36ZhIQE3N3dzdf+/vvv+euvv/IsZ0pKCr6+viQnJ0sfIiGEEKKYsOX726k1RKtWraJ169Y8+uijBAYG0qJFCxYsWGDeHx8fT0JCAhEREeY0X19f2rRpw86dOwHYuXMnfn5+5mAIICIiAhcXF3bt2mXOc99995mDIYDIyEiOHDnClStXspUrNTWVlJQU3SaEEEKIksupAdHx48eZN28ederUYf369QwfPpwXXniBzz//HICEhAQAgoKCdMcFBQWZ9yUkJBAYGKjb7+bmRsWKFXV5LJ0j8zUymz59Or6+vuZN5iASQgghSjanBkQZGRm0bNmSt99+mxYtWjBs2DCeeeYZPv30U2cWi8mTJ5OcnGzeTp065dTyCCGEEMKxnBoQVa5cmYYNG+rSGjRowMmTJwEIDg4GIDExUZcnMTHRvC84OJjz58/r9qenp3P58mVdHkvnyHyNzDw8PMxzDsncQ0IIIUTJ59SA6N577+XIkSO6tL///puwsDAAatSoQXBwMBs2bDDvT0lJYdeuXYSHhwMQHh5OUlISu3fvNufZuHEjGRkZtGnTxpxny5Yt3L5925wnNjaWevXq6Ua0CSGEEKJ0cmpANGbMGH755Rfefvtt/vnnH5YuXcr8+fMZOXIkoM0sGR0dzZtvvsmqVavYv38/gwcPpkqVKvTq1QvQapS6du3KM888w6+//sr27dsZNWoU/fr1o0qVKgAMGDAAd3d3hg4dysGDB/n666+ZPXs2Y8eOddatCyGEEKWW0QibNsFXX2k/jUZnlwhQTvbDDz+oxo0bKw8PD1W/fn01f/583f6MjAz16quvqqCgIOXh4aE6d+6sjhw5ostz6dIl1b9/f+Xt7a18fHzUk08+qa5evarLs2/fPtWuXTvl4eGhqlatqmbMmGF1GZOTkxWgkpOT83+jQgghhFArVigVEqIU3NkqVVIqOlqpuDil0tPtdy1bvr+dOg9RcSHzEAkhhBAFFxMDffpoYVBOQkJg9myIiir49YrNPERCCCGEKB2MRhg9OvdgCOD0aS1oiokpnHKZSEAkhBBCCIfbulULdqyhFERHF27fIgmIhBBCCOFw587Zlv/UKS2IKiwSEAkhhBDC4SpXtv2YM2fsX46cSEAkhBBCCIdr3x4qVbLtmAsXHFMWSyQgEkIIIYTDubrCfffZdkxAgGPKYokEREIIIYQoFEeP2pa/alXHlMMSCYiEEEII4XBGIxw4YH3+SpW0ZrbCIgGREEIIIRxuw4a85yDK7OOPtWa2wiIBkRBCCCEcbtEi2/IHBTmmHDmRgEgIIYQQDvfLL7blt3XeooKSgEgIIYQQDufublv+/MxbVBASEAkhhBDC4cLCrM/r7l64HapBAiIhhBBCOJjRaFuTWd++hduhGiQgEkIIIYSDbdoEV69an3/QIIcVJUcSEAkhhBDCoTZutC3/xYuOKUduJCASQgghhEOdPGlb/sLuUA0SEAkhhBDCwapVsz5vSEjhd6gGCYiEEEII4WAdO1qf95lnCr9DNUhAJIQQQggHc8kl2hjNLBQG3uIlAOrUKaRCZSEBkRBCCCEc6vz57Gke3EJhYBZjABjIEsA5/YdAAiIhhBBCOFjWIKcL67lFOV3afWwhIMA5/YdAAiIhhBBCOFj79uDvD6DYxr2sp6t53yoexIDiJGHMneuc/kMAbs65rBBCCCFKk9rGv7hIA11aO7aynXYAeHvDI484o2QaqSESQgghhEOd7TeWX5LuBEMXqEQZ0szBEMC1a7B1qzNKp5EaIiGEEEI4xpUrULEioZmSnuQ/LOZJi9nPnSucYlkiAZEQQggh7O/zz+GJJ3RJFbnEFSrmeIizRpiBNJkJIYQQwp7S0yE4WBcMZTz/AqEhiiSD5WDIYIDQUOeNMAMJiIQQQghhLzt3QpkykJh4J+3QIVw+ms3s2dpLg0F/iOn1rFnOG2EGEhAJIYQQwh5694a2be+8btMGMjKggdaZOioKvv0WqlbVHxYSoqVHRRViWS2QPkRCCCGEyL9Tp7Kv3vrjj9CtW7asUVHw8MPaaLJz57Q+Q+3bO7dmyEQCIiGEEELkz/Tp8NJL+rQbN6BcOcv50YIfWxZ7LSwSEAkhhBDCNtevazMpZvbuuzB+vHPKYwcSEAkhhBDCeitXQq9e+rQzZ6BKFacUx16kU7UQQggh8paRAc2b64Ohfv1AqWIfDIHUEAkhhBAiLwcOQJMm+rRff4W77nJOeRxAaoiEEEIIkbPhw/XBUPXq2uSLJSgYAqkhEkIIIYQlFy9CQIA+belS6N/fOeVxMKkhEkIIIYTe/PnZg6GkpBIbDIEEREIIIYQwSUsDHx949tk7aZMnax2nfX2dV65CIE1mQgghhIBNm6BTJ33a0aNQu7ZTilPYpIZICCGEKM2Ugq5d9cHQ/fdrw+xLSTAEUkMkhBBClF7Hj0OtWvq0n3+Gzp2dUx4nkhoiIYQQojR69VV9MFS2LNy6VSqDIZAaIiGEEKJ0uXpV6zid2ccfw8iRzilPESEBkRBCCFFafPMN9O2rT0tIgKAg55SnCJEmMyGEEKKkMxq1DtKZg6GhQ7UO1RIMAU4OiKZOnYrBYNBt9evXN++/desWI0eOxN/fH29vb3r37k1iYqLuHCdPnqRHjx54enoSGBjIhAkTSE9P1+XZtGkTLVu2xMPDg9q1a7N48eLCuD0hhBDC+f74A9zc4NixO2l79sDChc4rUxHk9BqiRo0ace7cOfO2bds2874xY8bwww8/sHz5cjZv3szZs2eJiooy7zcajfTo0YO0tDR27NjB559/zuLFi3nttdfMeeLj4+nRowedOnVi7969REdH8/TTT7N+/fpCvU8hhBCi0A0ZAq1a3XndqJFWW9S8udOKVFQZlFLKWRefOnUq33//PXv37s22Lzk5mYCAAJYuXUqfPn0A+Ouvv2jQoAE7d+7knnvuYe3atfTs2ZOzZ88S9L8qv08//ZQXX3yRCxcu4O7uzosvvsiaNWs4cOCA+dz9+vUjKSmJdevWWVXOlJQUfH19SU5OxidrRzQhhBCiqElIgMqV9WkrVkCmSoXSwJbvb6fXEB09epQqVapQs2ZNBg4cyMmTJwHYvXs3t2/fJiIiwpy3fv36VKtWjZ07dwKwc+dOmjRpYg6GACIjI0lJSeHgwYPmPJnPYcpjOoclqamppKSk6DYhhBCiWJg9O3swdPVqqQuGbOXUgKhNmzYsXryYdevWMW/ePOLj42nfvj1Xr14lISEBd3d3/Pz8dMcEBQWRkJAAQEJCgi4YMu037cstT0pKCjdv3rRYrunTp+Pr62veQkND7XG7QgghhOPcugWurhAdfSdt2jSt47S3t9OKVVw4ddh9t27dzP9u2rQpbdq0ISwsjG+++YZy5co5rVyTJ09m7Nix5tcpKSkSFAkhhCi6fvoJIiP1aSdOQFiYU4pTHDm9ySwzPz8/6tatyz///ENwcDBpaWkkJSXp8iQmJhIcHAxAcHBwtlFnptd55fHx8ckx6PLw8MDHx0e3CSGEEEWOUtCunT4Y6tlTS5dgyCZFKiC6du0ax44do3LlyrRq1YoyZcqwYcMG8/4jR45w8uRJwsPDAQgPD2f//v2cP3/enCc2NhYfHx8aNmxozpP5HKY8pnMIIYQQxdLff4OLC2zffidtyxb44QfnlakYc2pANH78eDZv3syJEyfYsWMHjzzyCK6urvTv3x9fX1+GDh3K2LFjiYuLY/fu3Tz55JOEh4dzzz33ANClSxcaNmzIoEGD2LdvH+vXr+eVV15h5MiReHh4APDcc89x/PhxJk6cyF9//cXcuXP55ptvGDNmjDNvXQghhMi/8eOhXr07r/39IS0N2rd3XpmKOaf2ITp9+jT9+/fn0qVLBAQE0K5dO3755RcCAgIA+PDDD3FxcaF3796kpqYSGRnJ3Llzzce7urqyevVqhg8fTnh4OF5eXgwZMoTXX3/dnKdGjRqsWbOGMWPGMHv2bEJCQli4cCGRWdtahRBCiKLuyhWoWFGftnChNuu0KBCnzkNUXMg8REIIIZzuiy9g8GB92sWLWu2QsKhYzUMkhBBCiFykp0OVKvpg6PnntY7TEgzZjax2L4QQQhRVO3dC27b6tIMH4X8Dh4T9SA2REEIIURT16aMPhu66CzIyJBhyEKkhEkIIIYqSU6egWjV92urV0KOHc8pTSkgNkRBCCFFUzJiRPRi6cUOCoUIgNURCCCGEs924AV5e+rSZM2HCBOeUpxSSgEgIIYRwph9+gIce0qedPg1VqzqnPKWUNJkJIYQQzqAUtGihD4b69tXSJRgqdFJDJIQQQhS2AwegSRN92q5dcPfdzimPkBoiIYQQolCNHKkPhkJDtckXJRhyKqkhEkIIIQrDxYvwv7U6zZYsgQEDnFMeoSM1REIIIYSjLViQPRi6ckWCoSJEAiIhhBDCUdLSwMcHhg27k/bii1rHaT8/pxVLZCdNZkIIIYQjbN4MHTvq0/7+G+rUcUpxRO6khkgIIYSwJ6WgWzd9MNSxo7YOmQRDRZbUEAkhhBD2Eh8PNWvq02JjISLCOeURVpMaIiGEEMIepkzRB0Pu7nDrlgRDxYTUEAkhhBAFcfWq1nE6szlzYNQo55RH5IsEREIIIUR+ffstPPqoPi0hAYKCnFMekW/SZCaEEELYKiMD6tbVB0NPPaV1qJZgqFiSGiIhhBDCFnv2QMuW2dOaN3dKcYR9SA2REEIIYa0nntAHQ/Xrg9EowVAJIDVEQgghRF4SEqByZX3at99C797OKY+wO6khEkIIIXLz0UfZg6GrVyUYKmEkIBJCCCEsuXUL3Nxg9Og7aVOnah2nvb2dVizhGNJkJoQQQmT1008QGalPi4+H6tWdUhzheFJDJIQQQpgoBffdpw+GevTQ0iUYKtGkhkgIIYQAbSX6evX0aVu2QPv2zimPKFRSQySEEEJMnKgPhipUgLQ0CYZKEakhEkIIUewZjbB1K5w7pw0Ia98eXF2tODApSQt+Mlu4EIYOdUQxRREmAZEQQohiLSZGGwh2+vSdtJAQePJJrR/0tWvQrh08/7y2AL3Zl1/CoEH6k128CP7+hVJuUbQYlFLK2YUo6lJSUvD19SU5ORmfrCsaCyGEcAqjEd54A6ZNs/6YTp1g3ep03OtWhzNn7uwYORI+/tjuZRTOZcv3t9QQCSGEKHZiYuCZZ+DyZduOuxH3C+5e4frEAwegUSP7FU4US9KpWgghRLESE6NNEm1rMPQ1j/ELd4KhU8GttVXrJRgSSA2REEKIYsRohBdesO2YqpzmNKG6tJ78wLoLPblxO0u/IlFqSQ2REEKIYmPgQH3Xn7xM5J1swZAn11lDT4xGmDvXzgUUxZbUEAkhhCgWJk6Er7+2Lm85bnADL/3xvMO7TNSlHTtmr9KJ4k4CIiGEEEVeWhp88IF1eXuwmtU8qEsL4RRnCMmW99o1e5ROlATSZCaEEKLImztX6z+UO8VvtNYFQ1/zGAaUxWAIIDbWmvOK0kACIiGEEEXe0aO572/IQRQutGa3Oa0Nv9CP3NvYzpzRZrgWQgIiIYQQRd7x4znvm8MoDtLY/PoUIbhxm19pY9W5z50raOlESSB9iIQQQhRpRiPExWVP9+ciFwnQpT3OFyzhcZvOX7lyQUonSgqpIRJCCFGkbdwIqan6tKEszBYM+XHF5mAoNFQWtBcaqSESQghRaPKzKv0XX9z5txu3OU8gFUgyp81kAi8yM1/l6dcv7+uL0kFqiIQQQhSKmBioXl1bYHXAAO1n9epaem6uXtV+tmMrt3HXBUN1OZLvYAhg2TIZZSY0EhAJIYRwuOXLtfXHTp/Wp585A3365B4UtW0Lq+nBVu4zp22iAwYyOErdApXr1CkZZSY0RSYgmjFjBgaDgejoaHParVu3GDlyJP7+/nh7e9O7d28SExN1x508eZIePXrg6elJYGAgEyZMID09XZdn06ZNtGzZEg8PD2rXrs3ixYsL4Y6EEEIAfPst9O9veZ9S2s/o6Bxqak6cYMJEAz340Zz0AD/RiU2AwS7lk1FmAopIQPTbb7/x2Wef0bRpU136mDFj+OGHH1i+fDmbN2/m7NmzREVFmfcbjUZ69OhBWloaO3bs4PPPP2fx4sW89tpr5jzx8fH06NGDTp06sXfvXqKjo3n66adZv359od2fEEKUVjEx8OijuTdLKZVDTc20aVCjhvllGmXw4BY/84BdyyijzAQAysmuXr2q6tSpo2JjY1WHDh3U6NGjlVJKJSUlqTJlyqjly5eb8x4+fFgBaufOnUoppX788Ufl4uKiEhISzHnmzZunfHx8VGpqqlJKqYkTJ6pGjRrprtm3b18VGRlpdRmTk5MVoJKTk/N7m0IIUeqkpioVEKCUFvLkvS1d+r8DU1Ky7RzFR1afx9rNYFAqNFSp9HSnPibhQLZ8fzu9hmjkyJH06NGDiIgIXfru3bu5ffu2Lr1+/fpUq1aNnTt3ArBz506aNGlCUFCQOU9kZCQpKSkcPHjQnCfruSMjI83nsCQ1NZWUlBTdJoQQwnoxMVClCly4YP0xlSujta/5+OjSP3k1gY953q7lM/yvtW3WLBllJjRODYiWLVvGH3/8wfTp07PtS0hIwN3dHT8/P116UFAQCQkJ5jyZgyHTftO+3PKkpKRw8+ZNi+WaPn06vr6+5i00NDRf9yeEEKVRTIzWgfrSJeuP8fPJoMNz9bX2NZMnngCl+Hl/UI7HWWPwYAjJspRZSIgWe2XqhSFKOafNQ3Tq1ClGjx5NbGwsZcuWdVYxLJo8eTJjx441v05JSZGgSAghrGA0wrBhth3TnD3sSWkJmSvj//gDWrQAwMsr/+VxcYEFC7RaIFvnPxKli9NqiHbv3s358+dp2bIlbm5uuLm5sXnzZj766CPc3NwICgoiLS2NpKQk3XGJiYkEBwcDEBwcnG3Umel1Xnl8fHwoV66cxbJ5eHjg4+Oj24QQQuRt0ybbaob+j6fYQ8s7CfXqaVHV/4IhgGbN8l+ecePA3V0Lfjp21Ea7dewowZDIzmkBUefOndm/fz979+41b61bt2bgwIHmf5cpU4YNGzaYjzly5AgnT54kPDwcgPDwcPbv38/58+fNeWJjY/Hx8aFhw4bmPJnPYcpjOocQQgj7yfLrNkeBJKIw8BSL7iR+8w389ZdWrZNJlSq2l8PFBSZMgJn5n7NRlDJOazIrX748jRs31qV5eXnh7+9vTh86dChjx46lYsWK+Pj48PzzzxMeHs4999wDQJcuXWjYsCGDBg1i5syZJCQk8MorrzBy5Eg8PDwAeO655/j444+ZOHEiTz31FBs3buSbb75hzZo1hXvDQghRwsXEaJ2U8zKKOczhBX1iSgqUL28xf9WqtpXjiSfgs8+0miEhrFWk1zL78MMPcXFxoXfv3qSmphIZGcncuXPN+11dXVm9ejXDhw8nPDwcLy8vhgwZwuuvv27OU6NGDdasWcOYMWOYPXs2ISEhLFy4kMjISGfckhBClEimjtS5cSeVa3hThjuT507jNTrETaOj5VgI0Pr7hIRkn+XaEoNBq6WSJjFhK4NSpnlCRU5SUlLw9fUlOTlZ+hMJIUQWRiMEBeXed6gzP2ebULEGxzGG1iA+Pu8AJiZGW+LD2m+suDitr5Ao3Wz5/nb6PERCCCGKt9w7Uis20UEXDK2hOwYUJ6hh9TxAUVHaMPmKFa0rkyzHIWxVpJvMhBBCFH2bNllOr83RbIuv3sdmtnIfrq5aH2pb5gGKigJfX8gy165FshyHsJXUEAkhhCiQjIzsaTN4URcMJeGLO6nmFevffjt/kyJ27Kj1JzLksK6rwQChoVq/IyFsIQGREEKIAvH3v/NvX5JQGHiRO+Pdn2E+FUjiNneGfUVH5+9arq4we7b276xBkSzHIQpCAiIhhBAFcvKk9nMAS0iigm5fJS6wkGd0aabJEvPL1J8o63B8WY5DFISMMrOCjDITQgjLjEaoEmTk90vVCeXOuPhPGMEoPsmW/8EHYdUq+11bluMQubHl+1s6VQshhMi3vfN/JfFSG11aY/ZzkMbZ8vbsab9gCO4sxyGEPUhAJIQQpYzdalb69qXVN9+YX+6mJa35Hcje47llS/jhh/yXWQhHk4BICCFKkZgYGD1aP+tzpUowdy48+qiVJzlzRuuwk8mDrGI1D+Z4yPvv56OwQhQi6VQthBClhGl5jaxLYFy8CI89BhMnWnGSd9/NFgyVd7meazDk6gpt2+ajwEIUIgmIhBCiFDAaYdiw3PO8+642SsuiGze0ce2Zo6YZM9gUp7iW4ZnntXfssK28QhQ2CYiEEKIUyH15jTtGjNACGJ01a8DLS5928iS8+CKnTll3/X//tS6fEM4iAZEQQpQCGzdal+/CBa3DNaCtpNq6tTY8zMS0wmpoKAC7dll33pEjtSY7IYoq6VQthBAlRG6jx7Zts/48K1dCx8BD0KiRfsfOnXDPPboka2eyu35di6UcNXGizEkkCkoCIiGEKAEsjR4LCYEPP4QKFWDLFuvPVXPW8zDr4zsJVapobV5u2b8y6tSxrZzR0fDww/YNVnK699mzZdZqYT27zFSdlJSEn5+fHYpTNMlM1UKIoiwm5k5LVkFU5BKXqKRP/O9/YdCgHI9JSwNPTwv9jnIRF2e/CRVzunfTumaylEfpZsv3t819iN555x2+/vpr8+vHHnsMf39/qlatyr59+2wvrRBCiHwzGrXakYIGQ0/xf9mCoW0/XMk1GAJtTbKxY2271rlztpbOstzu3ZQWHW1bsCZKL5sDok8//ZTQ/3Wmi42NJTY2lrVr19KtWzcmTJhg9wIKIYTI2dat2ecVsoUbt7lERf6Pp81p7zIeA4pTV/2sOsfMmTBhArhY+Y1y9Gg+CmpBXveuFJw6lamTuBC5sDkgSkhIMAdEq1ev5rHHHqNLly5MnDiR3377ze4FFEIIkbOC1La0Yyu3caciV8xpdTnCRN4FtM7J1po5E65dA2t6FSxYYJ9aG2vv3V41UqJkszkgqlChAqf+N/HEunXriIiIAEAphVHqJYUQolAdOZK/436gJ1u5z/x6C+0xkMFR6mIwaKPq27e37ZzlysG4cXnnO33aPrU21gZstgR2ovSyeZRZVFQUAwYMoE6dOly6dIlu3boBsGfPHmrXrm33AgohhMjOaISBAyFTl06rhHGCE9TQpXVhPbF00aXNmpW/kWDWjjqzR61N+/baaLIzZyz3IzIYtP22BnaidLK5hujDDz9k1KhRNGzYkNjYWLy9vQE4d+4cI0aMsHsBhRBC6MXEQFCQ7cHQq7yuC4aMuFCWm9mCoa+/zv/IrMKstXF11YbWw51RZSam1/kN7ETpY/Ow+y1bttC2bVvcssxHkZ6ezo4dO7jvvvtyOLL4kmH3QoiiwrRAqy28uMY1yuvSRjOLjxhtMX9BhsUbjVC9et61NvHx9gtULM1DFBqqBUMy5L50s+X72+aAyNXVlXPnzhEYGKhLv3TpEoGBgSWyH5EEREKIosAUbNgyquwRYohBH0EFc45EgnM8ZulS6N8/n4XkztxAoA+KHDk3kMxULSxx6DxESikMWesm0QIir6yL/wkhhLAbW4bYG8jgIA11wdDnDMaAyjUYgoI3Z0VFaUFP1ar69JAQx02U6Oqq1Wr176/9lGBI2MrqTtVR//sEGwwGnnjiCTw8PMz7jEYjf/75J23btrV/CYUQQgDw3nvW5WvGXvbSQpfWkt3soWWex7q52acTclSUtkSHI2ptpDZIOILVAZGvry+g1RCVL1+ecuXKmfe5u7tzzz338Mwzz9i/hEIIIZg4EdasyTvfQoYylP+YX/9NHRpwmAysixiqVbNfcGGqtbEnWbdMOIrVAdGiRYsAqF69OuPHj5fmMSGEKCRpafD++7nnCeA85wnSpT3KN3zLozZdq0uXvPM4S07rlp05o6XLumWiIOyyuGtJJ52qhRAFVZBmnvvv10Z+5WQEn/AJo3Rp5UnJNrLMGjduaBMsFjV5dSh3xOg1Ufw5tFN1YmIigwYNokqVKri5ueHq6qrbhBBC3GE0wrRp4OcHnTrBgAHaz7AwrcYjLxMn5hwMuZPKLTx0wdDrvIoBla9g6N57i2YwBLJumXA8m2eqfuKJJzh58iSvvvoqlStXtjjiTAghhBbwDBmirfGV1Zkz2nxCK1bk3MyTlpZzR+r72cAGInRpNTlGPDXzXd6NG/N9qMPJumXC0WwOiLZt28bWrVtp3ry5A4ojhBAlg7UTKA4bpo3GslTB/uyzliY3VGygM/dzp9poLV3pzo9Awf5A3bHD/p2g7UXWLROOZnOTWWhoKNLtSAghcmY0aiOhrHHpEmzaZPkcy5fr02pzFIWLLhjqwCa6s5aCBkNQtGtXTOuW5dQokd8FaYUwsTkgmjVrFpMmTeLEiRMOKI4QQhR/tkygCJabqrZuhevX77x+m8kcpa75dTI+uJPKFjoUoKR6Rbl2RdYtE45mc5NZ3759uXHjBrVq1cLT05MyZcro9l++fNluhRNCiOLo1KmC5zfV1viQTDJ+un3D+IwFDMtf4XJQHFaFN82AbWkeIlm3TBSUzQHRrFmzHFAMIYQoOWbOtC1/errWRJa5dqNyZejPUpYyUJe3Ehe4RCU7lFJv9uziUbviyBmwRekm8xBZQeYhEkJYKy0NMq1sZDV/f5g//3+1HEYjqmZNDCdPmvfP4zlGMM9+Bc0kOho+/NAhpxbCqRw6DxHAsWPHeOWVV+jfvz/nz58HYO3atRw8eDA/pxNCiBJj7tz8HXfpkjYqLe6dX8HNTRcMNWa/w4IhgJ49HXZqIYoNmwOizZs306RJE3bt2kVMTAzX/jfBxr59+5gyZYrdCyiEEMXJsWP5P/Yr+tFpUhvz6z00xwUjB2lsh5IJIXJjc0A0adIk3nzzTWJjY3F3dzen33///fzyyy92LZwQQhQ3tWrZfkwVzqAw0I+vzWn731xJS/ag8leRb5OEBIdfQogiz+b/afv37+eRRx7Jlh4YGMjFixftUighhCiuRoywLf843uMMIbq0iSOu0XDSQ1SpYseC5eLChcK5jhBFmc2jzPz8/Dh37hw1atTQpe/Zs4eqVavarWBCCFEcubpqm9GYe76y3OQmnrq0ybzNDCZjmAf/XoQrVxxY0EwCAgrnOkIUZTbXEPXr148XX3yRhIQEDAYDGRkZbN++nfHjxzN48GBHlFEIIYqNrVvzDoa68WO2YCiUk8xgMqAt1/HNN3DzpqNKqVeQfk9ClBQ2B0Rvv/029evXJzQ0lGvXrtGwYUPuu+8+2rZtyyuvvOKIMgohRLGR+/IXil3czY/0MKfE8AgGFKcJdXjZcjJ1qrb2mhClWb7nITp16hT79+/n2rVrtGjRgjp16ti7bEWGzEMkhLDWpk3QqVP29AYc4hCNdGnh7OAXwgunYLkwGLTZnuPji+8Eh0ajTNYosnP4PESgLfLavXt3evfuzfXr17lSWI3dQgjhIEajFtB89ZX2M6+mL0tMi5BmNovRumDoLJVx43ahBEMGA/Ttm3sepbTlQ7ZudXhxHCImBqpX1wLRAQO0n9WrS62XsI3NAVF0dDT/93//B4DRaKRDhw60bNmS0NBQNllaslkIIYoBe32pZl6EtAKXURgYzUfm/YP5nKqcxWj7mJZcTZuWPRALDdXW/nr4YevOUZRXu89JTAz06ZN9Md0zZ7R0CYqEtWwOiL799luaNWsGwA8//MDx48f566+/GDNmDC+//LJN55o3bx5NmzbFx8cHHx8fwsPDWbt2rXn/rVu3GDlyJP7+/nh7e9O7d28SExN15zh58iQ9evTA09OTwMBAJkyYQHp6ui7Ppk2baNmyJR4eHtSuXZvFixfbettCiBIsJkabJTrrl+rp0/n7Uo2Kgt9H/IfL+OvSK3CZL7D/4JPQUHj5ZThxAuLiYOlS7Wd8vFYWa1exL8qr3VtiNGoLvVrq+GFKi47OX02fKIWUjTw8PNSpU6eUUko988wzavTo0UoppY4fP67Kly9v07lWrVql1qxZo/7++2915MgR9dJLL6kyZcqoAwcOKKWUeu6551RoaKjasGGD+v3339U999yj2rZtaz4+PT1dNW7cWEVERKg9e/aoH3/8UVWqVElNnjzZnOf48ePK09NTjR07Vh06dEjNmTNHubq6qnXr1lldzuTkZAWo5ORkm+5PCFH0pacr5eWllPYVankLDdXyWSUtTamKFXUneI+xuZ6/oNuKFXnfY0iIUgaD5eMNBhvvsYiIi7Pu+cTFObukwlls+f62OSCqVq2aWr9+vUpPT1ehoaFq9erVSimlDhw4oPz8/GwvbRYVKlRQCxcuVElJSapMmTJq+fLl5n2HDx9WgNq5c6dSSqkff/xRubi4qISEBHOeefPmKR8fH5WamqqUUmrixImqUaNGumv07dtXRUZGWl0mCYiEKLnuvdeOX6rbtmU7sC5/OTQYmjLFuvv85pucgyGDIe+gqihautS6Z7R0qbNLKpzFlu9vm5vMnnzySR577DEaN26MwWAgIiICgF27dlG/fv1811QZjUaWLVvG9evXCQ8PZ/fu3dy+fdt8foD69etTrVo1du7cCcDOnTtp0qQJQUFB5jyRkZGkpKSYF5rduXOn7hymPKZzWJKamkpKSopuE0KUPN98A9u3W5f3zJk8Mjz0ELRrd+f1vfdivJ3BybL18l0+a8yZk3eTXkwMjB1reV9IiNbPKCrK/mVztJLaFCicw+ZefVOnTqVx48acOnWKRx99FA8PDwBcXV2ZNGmSzQXYv38/4eHh3Lp1C29vb7777jsaNmzI3r17cXd3x8/PT5c/KCiIhP8tvJOQkKALhkz7Tftyy5OSksLNmzcpV65ctjJNnz6dadOm2XwvQojiw2iEoUOtz5/T8hbG4//iWqu6Lm3fzPUcCulC4GZtlJcjXb6s9XPKKagxdTrOaYKVDz4onsEQ3BnRd+aM5fszTSfQvn3hl00UP/ka5tCnTx9A6/RsMmTIkHwVoF69euzdu5fk5GS+/fZbhgwZwubNm/N1LnuZPHkyYzP9OZWSkkJoqPMmTRNC2N/WrXDtmvX5LS1vcaj/GzRc9pr5dQYGPLlB6sSydiihbaKjoWdP2LHjzlw8bdvm3OkYtIBh7Fh45JHiOWePaURfnz7avWS+T1MgOmtW8bw3UfhsbjIzGo288cYbVK1aFW9vb44fPw7Aq6++ah6Obwt3d3dq165Nq1atmD59Os2aNWP27NkEBweTlpZGUlKSLn9iYiLBwcEABAcHZxt1ZnqdVx4fHx+LtUMAHh4e5pFvpk0IUbL8+69t+XVLNV67BgaDLhiK5kNcySCVwg+GTPMIVa2qnzagatXsI+csHVdc5x8CrXbr22+zvD8U76ZA4Rw2B0RvvfUWixcvZubMmbi7u5vTGzduzMKFCwtcoIyMDFJTU2nVqhVlypRhw4YN5n1Hjhzh5MmThIdrk5mFh4ezf/9+zp8/b84TGxuLj48PDRs2NOfJfA5THtM5hBCl04oVtuU3N7t89x2UL6/bV5mzzCbaLuUqiIsXc3+dk+I4/1BmUVE5TzkghNVs7bFdq1Yt9fPPPyullPL29lbHjh1TSmkjwGwdZTZp0iS1efNmFR8fr/788081adIkZTAY1E8//aSU0obdV6tWTW3cuFH9/vvvKjw8XIWHh5uPNw2779Kli9q7d69at26dCggIsDjsfsKECerw4cPqk08+kWH3QghVu7b1I7k8PZVSRqNSDRvqdnzOIIeOICusTYali5LKlu9vm/sQnTlzhtq1a2dLz8jI4Pbt2zad6/z58wwePJhz587h6+tL06ZNWb9+PQ888AAAH374IS4uLvTu3ZvU1FQiIyOZO3eu+XhXV1dWr17N8OHDCQ8Px8vLiyFDhvD666+b89SoUYM1a9YwZswYZs+eTUhICAsXLiQyMtLWWxdClCCpqdbnnRi5D1yb69Ja8Tt/0Mq+hSpk0ulYiDtsXty1VatWjBkzhscff5zy5cuzb98+atasyeuvv05sbCxbi3NjdA5kcVchSp7gYMjSvdCiBTzN09zpH/kPtajHETIo3j11TZ2OpZ+NKMls+f62uYbotddeY8iQIZw5c4aMjAxiYmI4cuQI//3vf1m9enW+Cy2EEIUlJibvYCiA85xHP2XHY3zNch5zYMnyVr48XL1653VAQM5TAmSWNV9IiDYCS4IhITQ21xABbN26lddff519+/Zx7do1WrZsyWuvvUaXLl0cUUankxoiIUoOo1FbtDW30VfDmctcRurSAj2SuZDq/P//3t766QKqVoVbt+DSpZyPCQ2Ff/7RD8lv316Go4uSz2E1ROnp6bz99ts89dRTxMbGFqiQQgjhaEajNqQ8cxCwdWvOwZA7qSTjS1nudDB6k5d5lTfBhj5HjpR17qSzZ3OeZ8ikXz9wd4eOHR1WLKew9P5KkCfyy6Zh925ubsycOTPbavJCCOEMRiNs2gRffaX9zLyqeUyMVjOSeV6e0FBYudLyuTqxkVTK6oKhmhzTgqEizJo6/mXLSt6K7zExWk1f5ve3evW8lzERIic2z0PUuXNnp88kLYQQMTEQFqb/QgwL09K//hp6984+v865c1q/GT3FBu5nI53NKevpgoEM4qnp6NsoFMV98sWsTMuRZK3pO3NGS5egSOSHzZ2qu3XrxqRJk9i/fz+tWrXCy8tLt/+hhx6yW+GEEMKSmBgt4MnqzBnL6TmpyTGOoZ9GpCNxbKZjwQpoZ1mXpciP4j75oonRmPNyJEppzyo6Gh5+WJrPhG1s7lTt4pJzpZLBYMBY0uplkU7VQhQlRiMEBeXeidgab/ESLzHd/Poq3vhzidu453KUc0ydqm0FERdXMvoQbdqk1QbmpaTcrygYW76/bW4yy8jIyHEricGQEKJo2bSpYMGQD8koDLpg6Dnm4cNVi8FQZKRjaxoqVoRp02DcuOzXcXWFCRPglVe0YfKmuYNsYTBofadKyuSL1tZ0lZQaMVF4bA6IhBDCmT75JP/H9uMrkvHTpQVwns94LsdjunaFwMD8XzM3lSrBp59C48bwwQfZOz4bjfDee1pH8NmztbSsQVHm1zntK0krvleubN98QpjkKyDasGEDPXv2pFatWtSqVYuePXvy888/27tsQgihYzRqa6vaygUj8VTnKwaY0z7lWQwoLhKQ83EuMGIE1KuXn9Lm7dIleOwxGDYs9z5Cpj4xllZ1r1pVW6h2xYrSseJ7+/a515aVtBoxUXhsDojmzp1L165dKV++PKNHj2b06NH4+PjQvXt3PinIn25CCJGHadNsP6YVv2PEjer8a05rwp8M59M8jx03Tpu/p1Yt269rDVMQlFsToFL6UWJZAyfT69Ky4rura961ZSWpRkwUIltXjq1ataqaM2dOtvSPP/5YValSxdbTFQuy2r0QzpeerpSbm22ruC+hvy5hD82UAaNVx7q7a9dUSqmHHsrfKvJDhij1yiv2WZE+OlopgyF7usGgbStWOPXtKXQrVigVEqJ/FqGhpe85iNzZ8v1tcw1RUlISXbt2zZbepUsXkpOT7RCiCSFEdhs3grVzwlbmLAoDA/jKnPYw39OCvSgrK8bT0u7Uyty4YWtpNZGR0LBh/o7N6ssvcx5qDlqzWmka11JaasRE4bE5IHrooYf4zkIj/sqVK+nZs6ddCiWEEFnNn29dvrG8z1n0nWm8uMYqHrb5mqaRSq1b23wooHXsLWjnXoNBW5j14sWc82RtVistXF21ofX9+2s/pZlMFITNEzM2bNiQt956i02bNhEeHg7AL7/8wvbt2xk3bhwfffSROe8LL7xgv5IKIUq1dety31+Wm9zEU5f2Em8xnZfyfU3T6LJOnWDGDOuPMxi0jr+mjr0hIdqkkbZOrmjqEzNwoKUZtrNz9lBzWVtMFGc2T8xYo0YN605sMHD8+PF8FaqokYkZhXA+V1fIyLC8rytrWUt3XVooJzlNaIGuWbUqfPSRtsJ8ZKT1xxkM+tFdpqUmwLagKDRUC4QqViz6kxHGxGgzSGdeTiMkROsALc1Ywlls+f62OSAqjSQgEsL5LAdEip2Ecw+7zCnf0YsoLI/N9/eHUaOsH61mqqGJitKGtVvDFMRkDQIsBQyWBATAhx9qwZiphsWa2bn9/SEx0Tk1MqaAL+u3ien5lbSh/6L4cOhM1UII4QweHvrX9TmMwkUXDLVlu8VgyNtbC4ISE+Gee6y/pukL/qefrMs/aFDOHXujomDx4rzPceGCFgwVlz4xea0tBqWvw7coniQgEkIUGUajtjTHV19pPzN/iYaE3Pn3h0RzmDvDtxIIwo3b7KStxfNev67NBu3qCkuW2FYmpeDqVevyDhmSexBz/rx158naF2jr1ryXK7l0ybpO1bk94/zYujX3Wq/S2uFbFD8SEAkhioTlyyE4WOsrM2CA9rN6da05BrQlLCpwGYWBaGabjxvCYiqTgDGPMSKmWopr1/JXPm/v3Pf7++fdfye/y07Ya/2umBjtmeb0jPND1hYTJYUEREIIp5s4UVvCIuvQ8tOntb4pMTHQ88IiLuOv21+By/yXIXmeP3MtRbt2+SvjhAm57//007ybuPK77IQ91u8y9fPJWptz5sydZ5wfsraYKCkkIBJCONW338K77+a831XdpsOjAbg8/ZQ57UOiMaBIooJN1zp3Dp5/XlujzFqmIOXll7WO1Zmb7jIbMybvoMLVVVvE1VJ/m9yWnSjo+l2O7Ocja4uJksLmgGjdunVs27bN/PqTTz6hefPmDBgwgCtXrti1cEKIks1ohKFDc97flu3cxh3/jDtVRz/NPsw03w/zdb3KlbW1ycaNsy5/1iAlKkoLaCyxpqYlJgbGjrW8L7eFWAu6fpcj+/nI2mKipLA5IJowYQIpKSkA7N+/n3HjxtG9e3fi4+MZm9P/dCGEsGDTJvjfr5NsvqMX27nTvnWhTjhkZNDlhfpcugQ//wyvvKJt69fbVksxc6bWBJZXTVHWIMVozDmgyaumJacmK5MPPsh9aHpUlOXV7q1Z0d7R/XwKUjYhigqb5yHy9vbmwIEDVK9enalTp3LgwAG+/fZb/vjjD7p3705CQoKjyuo0Mg+REI7x8svw9tv6tFBOcpIwXVpX1jLh56507pzzuXKa/DC3uXDS0mDuXDh2TFvR/tlnYdeunGda3rQpfxMkGo0QFqbVIllimtk6Pj7vmpT8zAad33LbSmaqFkWNLd/fNi/d4e7uzo3/rXT4888/M3jwYAAqVqxorjkSQghrnDypf/0Sb/EWr+jSynGDW5RjbB79W0y1FJZmS7Y0USJozWfR0fq03AKC/Na0vPVWzsEQ6Jus8gpITOt32cLUzyen5UOyLjWSX/kpmxBFhc0BUbt27Rg7diz33nsvv/76K19//TUAf//9NyE59TYUQggLTL8yPLnOdfTj2sfwAbMYY369dSt06ZL7+aKi4OGHHVdLkZ8RVTExMGWKdcc5ami6qZ9Pnz5a8GOpBk36+YjSzuY+RB9//DFubm58++23zJs3j6r/azReu3YtXbt2tXsBhRAl1+nT8DDfZwuGKnNWFwzZwpEroNs6oso0ustajhyaLv18hMidrGVmBelDJIT9vTghg8ffa0YTDpjTvmQgg/jSYv6ffybXPkSFJa+FWr/5Bh59VPu3tX13QAukrOlDVFDSz0eUJg7tQwRw7NgxFi1axLFjx5g9ezaBgYGsXbuWatWq0ahRo3wVWghResS+/yfvvNdMl9aa39hNa4v5y5YtOn1TcuqrZDJ27J0h+rY0gRVWk5X08xHCMpubzDZv3kyTJk3YtWsXMTExXPvfPPj79u1jirUN5UKIUss4dBgPjL8TDB2jJq6k5xgMAbz4YtGqxYiK0laktyTzfETWNoFNmyZNVkI4m81NZuHh4Tz66KOMHTuW8uXLs2/fPmrWrMmvv/5KVFQUp3Ob/auYkiYzIQrOeO48rlWCdGl9WcY39M31OG9vSEoqWgGR0aitAZbbr7uQkDvD+XMa3WXKd+JE0bo/IUoKW76/ba4h2r9/P4888ki29MDAQC5mXYhICFEiFHSF9N3PfJotGPIhOc9gCODzz4tesJDXzM+g7Z8xI/dZnA0GbX9Ruz8hSiObAyI/Pz/OWWgY37Nnj3nEmRCi5CjQCulpaaS6edJq4XBz0lu8hAHFVfKubY2KKppNSdb2DTL1IpDRXUIUfTZ3qu7Xrx8vvvgiy5cvx2AwkJGRwfbt2xk/frx5kkYhRMlgGlGVtbnHtAp9rl/ocXFw//14ZEqqxT8cp5bV1x8xwuYiFwpbhsdHR2ujxxw5P5IQouBs7kOUlpbGyJEjWbx4MUajETc3N4xGIwMGDGDx4sW4lsD/4dKHSJRG1vST8feHxMQsX+xKaTMo/vyzOSmWCLrwE5DDBD4WFMW+QybWPJvMCrokhhAifxzah8jd3Z0FCxZw7NgxVq9ezZdffslff/3FF198USKDISFKK2v6yVy6pC1LYXbsmLZiaqZgqBMb6UIstgRDAOPHF81gCPQrvFvDUTNQCyHsJ1/zEAFUq1aNatWq2bMsQogiJLe1tzKbMQPuuQeqzn2ZRivvrNR6y9UTH+MVbuOer+svWABNmhTdPjZRUdpweWtmG3HkDNRCCPuwqsls7NixVp/wgw8+KFCBiiJpMhOlhWn19/XrYd06644pTwop+OrS9jwzlw9vDeeLL/JfltxWqS8q8mo6s2UVeyGE/dl9puo9e/ZYdWFDTgv8CCGKvIkT4d13bTumL8tYRn9dWiCJXFwYSO/eBSuPUlpAER2tdUguigFF5kVTQRZNFaI4k7XMrCA1RKKkMdUEmSYOjI+Hjz6y/ngXjBylDjWJN6fN5xmeZb4DSlv0OyXHxGRfyiM0VAuGimrtlhClgcPXMhNCFF8TJsAHH0BGRv6Ob8nubMtsNGUf+2lqh9JZVtQ7JUdFybB6IYo7CYiEKEW6dbO+b5AlX/A4j7PE/PpPmtCcvSjbB6zapDh0SpZFU4Uo3iQgEqKU8PSEmzfzd2ww5zhHFV1aL75jJb0KXrBcmDolt2/v0MuUaEaj1FwJYQ3H/lknhCgSDIb8B0PRfJgtGPLiWqEEQyCdkguiQMuuCFHKSEAkRAnn6Zm/4zy4hcLAh9yZduNl3sSA4gZediqdplOn7Gt9VagAU6dqfXOE7UzLrmSdEuDMGS1dgiIh9JwaEE2fPp277rqL8uXLExgYSK9evThy5Iguz61btxg5ciT+/v54e3vTu3dvEhMTdXlOnjxJjx498PT0JDAwkAkTJpCenq7Ls2nTJlq2bImHhwe1a9dm8eLFjr49IZwuISF/NUORrOMW5XRp1fiXt3nZTiXTe+YZ+PdfbaLDihW1tMuXtUkPpUbDdkajNurN0hhiU1p0tJZPCKFxakC0efNmRo4cyS+//EJsbCy3b9+mS5cuXL9+3ZxnzJgx/PDDDyxfvpzNmzdz9uxZojKNYzUajfTo0YO0tDR27NjB559/zuLFi3nttdfMeeLj4+nRowedOnVi7969REdH8/TTT7N+/fpCvV8hCluzZrYeodhBOOvoZk75nocxoDiF42amr1wZVq7UaoQuX9bvkxoN2+W17IpScOqUlk8I8T+qCDl//rwC1ObNm5VSSiUlJakyZcqo5cuXm/McPnxYAWrnzp1KKaV+/PFH5eLiohISEsx55s2bp3x8fFRqaqpSSqmJEyeqRo0a6a7Vt29fFRkZaVW5kpOTFaCSk5MLdH9CFDZXV6W0r7+8t3oczpbYlm1WH5/fLTRUqdRUpUJCcs5jMGj50tO1+0pPVyouTqmlS7WfpnShWbrUume/dKmzSyqEY9ny/V2k+hAlJycDUPF/dea7d+/m9u3bREREmPPUr1+fatWqsXPnTgB27txJkyZNCAoKMueJjIwkJSWFgwcPmvNkPocpj+kcWaWmppKSkqLbhCjqjEbYsAFefVXbNmywvknkA8bwFw3Mr88TgBu32cG9DirtHbNmwY4d1tdoSEfhvFk7TUFxmM5AiMJSZAKijIwMoqOjuffee2ncuDEACQkJuLu74+fnp8sbFBREQkKCOU/mYMi037QvtzwpKSnctNDBYvr06fj6+pq30NBQu9yjEI4SEwOVKkFEBLz5prZl+RvAIj+uoDAwhlnmtCdYRBDnMRbCrBxRUdpm7cSLK1dKR2FrtG+vTVeQ02pKBoM2k7ZMZyDEHUUmIBo5ciQHDhxg2bJlzi4KkydPJjk52bydOnXK2UUSIkcxMdC7NyQl2XbcYD7nChV1aRW5xOc8Ybey5aVhQ+2ntTUVX34pHYWtYVpjDbIHRTKdgRCWFYmAaNSoUaxevZq4uDhCQkLM6cHBwaSlpZGU5Td9YmIiwcHB5jxZR52ZXueVx8fHh3Ll9CNpADw8PPDx8dFtQhQlaWna8hsPPYTNi6i6kk4CQbrAZxajMaCyBUiO5uenBTDW1GgEBMDFizmfSzoK60VFwbffZp/OICRES5c11oTQc2pApJRi1KhRfPfdd2zcuJEaNWro9rdq1YoyZcqwYcMGc9qRI0c4efIk4eHhAISHh7N//37Onz9vzhMbG4uPjw8N//fnZ3h4uO4cpjymcwhRnEycCB4eMG4c/PCDbceGs4N0yhDEnf8vDTikazIrTOPHa/1/Vq7Mu0Zj4EDrzlnU1z0rTFFRcOKEtjju0qXaz/h4CYaEsMjxfbxzNnz4cOXr66s2bdqkzp07Z95u3LhhzvPcc8+patWqqY0bN6rff/9dhYeHq/DwcPP+9PR01bhxY9WlSxe1d+9etW7dOhUQEKAmT55sznP8+HHl6empJkyYoA4fPqw++eQT5erqqtatW2dVOWWUmSgqJkzI/2iuGHrpEnZwj4IMh40e8/S0Lp/BoG0rVmhb1tFmoaFaelycdeeLi3P2uySEKCps+f52akAEWNwWLVpkznPz5k01YsQIVaFCBeXp6akeeeQRde7cOd15Tpw4obp166bKlSunKlWqpMaNG6du376tyxMXF6eaN2+u3N3dVc2aNXXXyIsERKIoSE3N57B2/s2W2JUfHRYImbbRoy0HODkFRaZh9TkNqU9P185lMFg3NF8IIWz5/jYoZamLosgsJSUFX19fkpOTpT+RcJqyZSE11bZjJvN2ttmly3Ej2yzUjhAXp63+bjTCnDkwZoz1x+TEtBwF3OlIDXea1aRvjBAiM1u+v4tEp2ohRO4uXLAtGPLkOgqDLhgay/sYUIUSDAUE3BnS7eoKWWa9yFFe/X+ko7AQwlEcP9GIEMJmRqM2WurMGS0YmjLF+mMfYmW2leircCbbivWONHeufki3PScKjIrSFnzdulULoCpX1oIvGUIuhCgICYiEKGJiYrSFOXObudkSAxnspTlN2W9OW0p/BrLUziXM3YQJd5q1TEzD6s+c0Td1mRgM2n5rJwp0dc29aU0IIWwlAZEQRYhpkkVbNWY/+2mqS7uLX/mdu+xUsrz5+sKCBfDoo9n3mSYK7NNHC34s9f+RiQKFEM4kfYiEKCIuX85fMPQpz+qCoXiq40q6XYMhF5ecJ00Erc/Q+fOWgyET6f8jhCjKpIZIiCKgZk1twjxbVOICFwjUpfVnKcvob8eSaTIytJ851e58+im4u+d9npLU/8fUz6u434cQQiMBkRBO5uUFN27YdswwPuMzntOl+ZJECr52LJledLRWk5O5b1NIiNbUZUvtTkno/2Opn1dIiNYsKDVdQhRP0mQmhBN17WpbMFSGNK7irQuG3mYyBpRDgyHQanZkGYg7cyFl7fR+5oyWHhPjnHIJIQpGJma0gkzMKBxh2DCtE7K1OrCJTXTSpdXmKMeoXeCyVKoEly7lPgIsPl6ahIxGbe21nEYAyrMSomiRiRmFKIJu3tSCoNBQ7YvT+mBIsZ4uumDoZzpjIMMuwVBoqDZvEOS8sKqMANNs3Zr7dAhKwalTWj4hRPEifYiEKAS9emkrutuqBsc5Ti1d2v1sII777VIug+FOHyBXV8v9YmztI1SS5TWTtq35hBBFhwREQjhYfoOh13mVV3nT/PoG5ajAFdLwyFc5KlbUhvabhIbqg52SNALMUew547YQomiRPkRWkD5EIr9u3gRPT9uOKU9Ktg7SI/iEeYwoUFl+/lkLbiTYyT9TH6K8ZtyWPkRCFA22fH9LDZEQDmRrMPQYX/M1/XRpgSRmm28oP1avhg8/zD2PI+bWKUnz9ciM20KUXNKpWgg7Mhph0yb4v//LfWbnrFww8g+1dMHQAp7GgLJLMATaF3VuQ8JjYrTaj06dYMAA7Wf16gUbRu6IczqbzLgtRMkkTWZWkCYzYY2YGHj+eTh71rbjWrKb3bTWpTVjL3/SzI6l04SGWm7OMc2tk/W3gSmoy88XvSPOWZSUpJovIUoqW76/JSCyggREIi9ffw39+uWdL6vPGcxgvjC/PkAjmvInyoGVt3Fx+pmiHTG3jszXI4QoCmQeIiEK0fPP2x4MBXMOhUEXDD1CDE04kO9gyJq1xCD7kHBHzK0j8/UIIYob6VQtRAEEB0Niom3HjGYWsxijS/PmKtfxzlcZmjaFDz7Q/h0RkXf+rEPCHTG3jszXI4QobiQgEiKfbA2GPLjFLcrp0l7ldd7k1XyXwWCA337TaoeMRq0ZKq8h4e3b69OPHrXuWrbMrSPz9QghihtpMhMiH154wbZgqAvrswVDYZwoUDAE8Nhjd5rKTEPCwfolOIxG65YQsRRI5aZ9e+2YnEbaGQxaB29bzimEEI4kAZEQNjAaYf16mDPH2iMU27iX9XQ1p6ziQQwoThJW4PI8+KD+ta1DwvPq62PyzDO2dX7OT3AmhBDOJAGREHkwGmHVKm1FeDc36No172MA6nIEhQv3ssOc1o6tPMwqu5XtwoXsaVFRcOKENpps6VLtZ3y85SHu1vbhqVPH9rLJfD1CiOJE+hAJkYuYGOjd2/bj3mMc4/jA/Poi/lTmHOmUsWPpICDAcrqrq35ofU4c3ddH1kcTQhQXEhAJkYP8BEN+XOEKFXVpT/F/LOIpO5bsjr/+0mbGzm+QYerrY2tHbFtYG5wJIYQzSZOZEBZcvmx7MDSI/2YLhipyyWHBEMCbbxZsOQzp6yOEEBoJiITIonZt8Pe3Pr8r6ZwjmP8yxJw2mxcwoLIFSI5y5oy2TEZ+giLp6yOEELJ0h1Vk6Y7S4eZNreP0jRvWH3MPO9lJW11aQw5ymIZ2Ll3eCrochqzNJYQoaWTpDiFs1KsXeHraFgx9S29dMLSLuzGQ4ZRgCAq+HIapr0///tpPCYaEEKWJdKoWpd6DD8Lq1dbnD+EUp6imS+vOGtbS3c4lyx9ZDkMIIWwnNUSiVDIaYcMGqFLFtmBoEtOzBUPluFFkgiGQ5TCEECI/JCASpU5MjNZpOiLC+toUT66jMDCdl8xp43kXAyrbkhyO5O8vy2EIIYQjSEAkShXT3ELJydYf8yCrsq1EX5XTvM94O5fuDhcL/zP9/eGp/43glyHyQghhXxIQiVIjLc3WuYUUe2jOKh42p3xFPwwozlI1l+Pyr3dvmDbN8iSJly/De+/B+PEyRF4IIexNOlWLUsHWWacbcYADNNGl3c0ufuNuO5fsDn9/be2xWrUsB0RKaTVBy5bBsWOwY4cMkRdCCHuRgEiUeF99BQMGWJ+/B6tZzZ1l5P+lGjU5TgaOjTjmz9eCnNxWnzcNrd+xQ5bDEEIIe5ImM1GitWtnfTBUjhvMZbguGBrAEqrzr8ODoZ49teYuazt5y9B6IYSwL6khEiXSzZvaRIvWasluljCQ+hwB4APG8BJvk0pZB5VQb9cubSoAR68+L4QQwjKpIRIlSloaNGxofTDkgpFJTOcX7qE+RzhDFSKIZRwfFFowBHDhgjbDtGn1eWuG1huN2kr3X32l/TQaC624QghR4khAJEqMF14ADw84fNi6/GGcYBMdmc5LlCGd5fShCfvZQIRjC5qDc+f0q89nlXlo/cqV2gr3nTppTYIFWfFeCCGEBESihPD1hTlzrM2tGMiX7KMZ7dnGVbwZwmIe45tCW53eksBAraZn61YoXz77/ooVtaH1oK1sn7XzdUFWvBdCiNJO+hCJYi8oCFJSrMvrxxXmMZx+fA3AdtoyiC+Ip6YDS5g3d3cYMkQLanJy6RJkZMCYMbkPy4+OhocflmH4QghhC6khEsXWtWtaf5vz563L35E4/qQp/fiadFx5hTfowGanB0Og9X3KLRgCLdgZMcK6Yfn5XfFeCCFKK6khEsVS8+awb591ed1J5U1eYRzv44Lib+rwOF86dJJFR1BK63xtDRmWL4QQtpGASBQ7rq5a05E1GnKQJQykOVr09BnDGMf72dYmK2lkWL4QQthGmsxEsXHtmtZsZE0wZCCD5/mI3bSiOfu4QCUeYiXP8VmxD4YqVZIV74UQwt4kIBLFQqNGlkdeWRLMOX6kOx8xmrKk8iPdaMJ+fuAhxxbSwUzBzty5d15n3Q+y4r0QQuSHUwOiLVu28OCDD1KlShUMBgPff/+9br9Sitdee43KlStTrlw5IiIiOHr0qC7P5cuXGThwID4+Pvj5+TF06FCuXbumy/Pnn3/Svn17ypYtS2hoKDNnznT0rQk7uXlT+6I/dMi6/L34jv00oSvruUlZRvAJPVhDIsGOLWgeQkKyr1Bvi8zBzqOPasPvZcV7IYSwH6cGRNevX6dZs2Z88sknFvfPnDmTjz76iE8//ZRdu3bh5eVFZGQkt27dMucZOHAgBw8eJDY2ltWrV7NlyxaGDRtm3p+SkkKXLl0ICwtj9+7dvPvuu0ydOpX58+c7/P5EwfTqZf2M015cYwFP8x1RVOISf9CClvzBPEYAObQvFaIPP4SPPtICm5yau3KTNdiJioITJyAuDpYu1X7Gx0swJIQQ+aaKCEB999135tcZGRkqODhYvfvuu+a0pKQk5eHhob766iullFKHDh1SgPrtt9/MedauXasMBoM6c+aMUkqpuXPnqgoVKqjU1FRznhdffFHVq1fP6rIlJycrQCUnJ+f39oSNatVSShtXlffWhp3qKNoBRgxqOi+qMqRafXxhbHFx2n2tWKFUSIj1x/XsqR2bnu7Md0MIIYonW76/i2wfovj4eBISEoiIuLOMgq+vL23atGHnzp0A7Ny5Ez8/P1q3bm3OExERgYuLC7t27TLnue+++3B3dzfniYyM5MiRI1y5csXitVNTU0lJSdFtovAYDHDsWN75XEnnNaaxjXbU5hj/Uo1OxDGZGdzGPe8TFCLTMPioKFi82Prjdu7UOkhLnyAhhHCsIhsQJSQkABAUFKRLDwoKMu9LSEggMDBQt9/NzY2KFSvq8lg6R+ZrZDV9+nR8fX3NW2hoaMFvSOQpLc365qSaHGMr7ZnGVNwwsoQBNGMfW+jg2ELmU+Zh8NZOJAna7NSbNtm9OEIIIbIosgGRM02ePJnk5GTzdurUKWcXqcTr3VtbmDVviif5D/toRji/kIQvA1jC4ywhGT8HlzJ/XF2hTZs7K9MnJtp2vAREQgjheEV2YsbgYG1UUGJiIpUz/XmdmJhI8+bNzXnOZ/lzOz09ncuXL5uPDw4OJjHLN5DptSlPVh4eHnhY9+0s7MDaWqGKXGI+w+iNtnrpJjowhM85SZgDS1dwRiOEhelnmXZ11dKFEEIUDUW2hqhGjRoEBwezYcMGc1pKSgq7du0iPDwcgPDwcJKSkti9e7c5z8aNG8nIyKBNmzbmPFu2bOH27dvmPLGxsdSrV48KFSoU0t0ISxISrA+GHuAn9tOE3sSQRhkm8g6d2VDkgyGTrEtu2BIMdexo16IIIYSwwKkB0bVr19i7dy979+4FtI7Ue/fu5eTJkxgMBqKjo3nzzTdZtWoV+/fvZ/DgwVSpUoVevXoB0KBBA7p27cozzzzDr7/+yvbt2xk1ahT9+vWjSpUqAAwYMAB3d3eGDh3KwYMH+frrr5k9ezZjx4510l0LgHLlrFteoiw3mcVofiKSKpzjMPW5h194l4lkUPJ7Gvv7S0AkhBCFohBGveUoLi5OAdm2IUOGKKW0ofevvvqqCgoKUh4eHqpz587qyJEjunNcunRJ9e/fX3l7eysfHx/15JNPqqtXr+ry7Nu3T7Vr1055eHioqlWrqhkzZthUThl2bz/p6dYPOW/KXrWfRuaEOYxU5bju9CH0hbmtWOHsd0wIIYovW76/DUop5cR4rFhISUnB19eX5ORkfHx8nF2cYmv5cnjssbzzGchgDB/yNi/hQRoJBPEU/2Et3R1fSCeIjtaezZkzd9JCQmD2bJloUQghCsKW7+8i26lalBxGIzz8MKxZk3feEE7xOUO4nzgAVvIQT7OQiwQ4uJTO8/DD8N57sHWrNl9R5coy95AQQhQ2CYiEQy1ZAo8/bl3eR/mGz3iWCiRxHU+imcVCnqYoLL3hCAaDVhNkCn6kr5AQQjiPBETCYWrVguPH887nQzJzeJ7BfAHAr9zF43zJUeo6uITOIyvTCyFE0VJkh92L4q1iReuCoXvZxj6aMZgvMOLC67zKvWwv0cEQyMr0QghR1EgNkbCrtDTrZpx24zZTmcokZuBKBsepwSC+YAf3Or6QTvLKK9CwofQREkKIokgCImE3Y8ZoTUB5qcsRvuRx7uJ3ABbxBKOZzVVK9gi+zp2ln5AQQhRVEhCJArt5Ezw9rcmpGMZ8PmAsXtzgMhUYxnxW0MfRRSyQV1+F+++HlSutC/iyytx5WgghRNEkAZEokAcegJ9/zjtfAOf5P4byIKsB+JnODOFzzlLVwSUsGH9/mDLlziiwtm2hb19t2kRrSOdpIYQoHqRTtcg3g8G6YKg7a9hPEx5kNam4M4YP6MJPTg2GXFzghRdg2jTtPnJaU+3TT/WBzKOPagGStaTztBBCFA9SQyRsZjSCmxWfnHLc4D3GM4J5AOynMQNYygGaOLiEefv6a+jzv5a6xo1h9Gg4fTp7vjFjtOApc0DzyiswZw5cupTz+f39tWt07Cg1Q0IIURxIDZGwybJl1gVDLdnNH7Q0B0MfMIa7+K1IBEMVK8Ijj9x5HRUFH3xgOe/p09C7N8TE3ElzdYX58y3XKplqm+bP1zpRSzAkhBDFgwREwipGIwQGQv/+uedzwcgkpvML91CfI5yhChHEMo4PSKVs4RQ2D5cvw1tv3XltNMLw4bkfM2yYli+zihWz5ytfXqttqlgxe34hhBBFlwREIk8LFmi1Qhcu5J4vjBPE0YnpvEQZ0vmW3jTlTzYQUTgFtcGUKXdqfTZtyr35C7T9mzZp/46J0ZrbLB2TkqJ1oO7UCapX19csCSGEKLokIBK5Mhi02pHcKQbyJftoxn1s5SrePMEiHmU5l/EvjGLmS3S0VotjCnTysmmTln/0aOtGmZ05owVOEhQJIUTRJwGRyFFOI68y8+MKSxnAlwzClxS205Zm7ONznqCoL8p66pS2wrwttm613PnaElPQZAq8hBBCFF0SEIlsEhKsC4Y6Esc+mtGfZaTjyqu8Tgc2E09NxxfSTs6ds3726I4dtfy2UCp/gZcQQojCJQGR0HF11dbayo07qbzDRDbQmWqc4ii1acsO3uRVjMVsJofKlbVAxz+Plj1/fy1fXs8mJ7YGUkIIIQqXBETCzGCAjIzc8zTgEL9wDxN5FxcU83mGFuzhN+4unELaicEAoaF3FlmdPz/3/PPna/nat9cmW7SmBi2z/AZSQgghCocERIKbN635gleMYg67aUUL9nKBSjzM9zzLfK7jXRjFtBtLy2lERcGKFVA1y+TZISFaumliRldXmD1bf568rmUKvIQQQhRdEhCVcqGheS/MGsw51tKNObxAOW6xlq40YT+reLhwClkATz6pBTWZ5bScRlQU/PsvxMXB0qXazxMnLOf79tvswVNWso6ZEEIUHwalrF2msvRKSUnB19eX5ORkfHx8nF0cu7GmhuNhvmchT1OJS9ykLBN4l08YSVEfQQZav5/ERO3fmzbdGV7fsWP2JTWMRq3j87lzWvOWqSktN5mPOXpUm68p8wi00FAtGJJ1zIQQwjls+f4uXj1ghV3cvJl3rZAX1/iQMTzDQgD20JyBLOEwDQuhhPbxwgtaUBMTo1+r7M03tVqi2bO1YCXrftDvz4mrq36E2ssv2x5UCSGEKBqkhsgKJamGqE4d+Oef3PPczS6WMJDaHCMDA+8ygVd5g9u4F04h7cBUO7RypTY5YtZPual2bPx4ePfdnM+Tuf+QEEKI4sWW728JiKxQUgKivJrIXEnnJd7mNV7HDSMnCWUw/2UzHQulfPlhMFieNXrFCnj4YW35jJwmUjQ9j9z+B5gCK0s1PflpZhNCCFF4bPn+lk7VpcCZM3kHQzU5xlba8zpTcMPIUvrTlD+LZDDk7a0FPJZGhYWG3qnVyWtWaaXyXoIj8xpmmcXEaMFWp04wYICsXSaEEMWd9CEq4Vxc8vrSVzzBYj7iBcpzjWR8GM48vmJAYRXRZmXLarU/rq7az5xqaew1GeKmTdC5853XpsVdsz5X09pllkawCSGEKNokICrB8qoVqsgl5jOM3mjVGpu5j8H8l5OEFULp8u/iRS0IMo0Uy2npDUdMhpjb4q5Kac88OvpOwCaEEKJ4kCazEig5Oe9gKIJY9tOE3sSQRhleZAb3s7HIB0Mm1tT+5HdWaUvnMbGmGU7WLhNCiOJHAqISpnx58PPLeb8Ht/iAMcTShSqc4zD1uYdfmMmLZODcKo3wcOvzWlP7Y+us0rmdx8TaZjhZu0wIIYoXCYhKEIMBrl3LeX8T/uR3WjOGWQB8wghasZs9tCycAuYhMlLr85SXkBDrl8LIaVbpgADry3X+/J1/W9sMJ2uXCSFE8SIBUQmQkJB7DYiBDMbwAb9xF405SCKBdGcNo/iEm+QxQ2Mhmjo178VlQav1saV/TlSUtgRH5iU5Tp8Ga2dQyBzc5NUMJ2uXCSFE8SQBUTFnMOReG1GV08TyAB8wDg/SWMWDNGE/a+leeIW0k4oV7TdRoqsrLFyYd76swU1uzXCydpkQQhRfEhAVY3n1i+nDcv6kKZ3ZyHU8GcZnPMxKLhBYOAW0s2++yV8wlNOcQa6uMGFCzscZDJaDm5ya4XJaNFYIIUTRJwFRMZRXE1l5UljMEJbzGBW5wq/cRQv2sIBhFLVFWW3p7Jy5L4+1THMGZR0ZZpoz6J57YPny7H2KQkNzD24sNcPFx0swJIQQxZUs3WGForR0R5kykJ6e8/572cYXDKIGJzDiwtu8xOu8RjplCq+QNggNhaefhilT8s4bF5fznEOWGI15L90REqIFMiDLcAghREkjq92XQEYjuOXybpXjBlOZyjjex5UM4qnO43zJDu4tvELaoFcvbYJDU/+cBQu0WhtL4bkpcLG1o7ItcwZ17GhbsCWEEKJkkSazYuCLL3IPhnqwmht4MZF3cSWDxQyhGfuKbDAEMGLEnZmmHdVRWeYMEkIIYS0JiIo4Pz8YPDinvYrfaM1qHjSnDOMznmQxV3Fu015ess435IiOyjJnkBBCCGtJk1kRlluH44Yc5CCNdWlt+IVfaePgUtmHpQ7SUVG5L9ZqK9OcQfZuihNCCFHySEBUBN28CZ65zJc4h1GM4hPz61OEUIN4jMXo7cypVia3xVptZWqK69NHC34yB0UyZ5AQQojMpMmsiHnwwZyDoYpcQmHQBUOP8wXVOFWsgqHCnMlZ5gwSQghhjeLzLVoK1Kx5Zwh4VkNZyEKe0aX5cYVk/BxfMDsr7FoZezfFCSGEKHkkICoCdu+G1q0t73PjNucJpAJJ5rSZTOBFZhZO4ezI3x/mz3dOrYw9m+KEEEKUPBIQOVluHafbsZWt3KdLq8sRjlLXwaWyr7JlYfJkePllqZURQghRNElA5ES5BUOr6UEPfjS/3kQHOhFHUVt6IzcGAzz2GCxZIoGQEEKIok0CIifJKRgK4wQnqKFLe4Cf+JkHCqFUtitXThsVZ+LjA23bQmSkNvmiu7t9rmM0Oq4PkCPPXVzIMygceT1na9+H0v5+lfb7Fw6iSpGPP/5YhYWFKQ8PD3X33XerXbt2WXVccnKyAlRycrJdyqENAM++vcZUXUIqZZQ7t3LMX1S2gACloqOViotTKj3dLo9IZ8UKpUJC9NcMCdHSi/K5iwt5BoUjr+ds7ftQ2t+v0n7/wja2fH+XmoBo2bJlyt3dXf3nP/9RBw8eVM8884zy8/NTiYmJeR5rz4Dor7+yBxTepGRLHMVHTg90rN0MBm1zxC+kFSu0czvimo48d3Ehz6Bw5PWcJ0yw7n0o7e9Xab9/YTtbvr9LzWr3bdq04a677uLjjz8GICMjg9DQUJ5//nkmTZqU67H2XO3e1RUyMu68jmIFK+ijyxNEAucJKtB1ClvmlePt2ZRl7Wr1tl7TkecuLuQZFI68njNoz9dotLzP9D788w/UqlV63y/5vIr8sOX7u1RMzJiWlsbu3buJiIgwp7m4uBAREcHOnTuz5U9NTSUlJUW32UvmYMiN27pgaDFDMKCKXTAE2t9pppXj7cWW1eqL0rmLC3kGhSOv5ww5B0Nw532YO7d0v1/yeRWOVioCoosXL2I0GgkK0gcaQUFBJCQkZMs/ffp0fH19zVtoaKhDypWOG1/RD4AW/MGTLHbIdQqTPVeOd+Rq9Y48d3Ehz6Bw2Ov5HTtWuNcrauTzKhytVAREtpo8eTLJycnm7dSpU3Y7dw3dADIDA/gKA4q9tLDbNZzJnivHO3K1ekeeu7iQZ1A47PX8atUq3OsVNfJ5FY5WKgKiSpUq4erqSmJioi49MTGR4ODgbPk9PDzw8fHRbfby++92O1WRYjDYf40y02r1OU1RUJBrOvLcxYU8g8KR13MGrc9LXu/DiBGl+/2Sz6twtFIRELm7u9OqVSs2bNhgTsvIyGDDhg2Eh4cXalkqVoSg4tdFCIBx47RfOll/ITlq5XjTavWZr2Gvazry3MWFPIPCkddzNhhg7Nic94P2Pri7l+73Sz6vwuEcPuatiFi2bJny8PBQixcvVocOHVLDhg1Tfn5+KiEhIc9j7T0PkVJKBQXlPpQ9NdXyfBuZNze3nIfB23NYfWho7nOlZN7vCI68pjPup6iRZ1A48nrO1r4Ppf39Ku33L2wjw+5z8PHHH/Puu++SkJBA8+bN+eijj2jTpk2ex9lz2H1mly/DXXfB8ePaaxcXOHQI6tW7kyfzjKyBgVra+fN3Zmc1GmHOHNi2Dby9YdAgbRHTHTvuHJOWBkuXwrVrcO+9UL8+fPUV/PuvNox1yBDo0EF/TNbr5Gc2XXuSmaodS55B4ZCZqu2jtN+/sJ4t39+lKiDKL0cFREIIIYRwHJmHSAghhBDCBhIQCSGEEKLUk4BICCGEEKWeBERCCCGEKPUkIBJCCCFEqScBkRBCCCFKPQmIhBBCCFHqSUAkhBBCiFJPAiIhhBBClHpuzi5AcWCazDslJcXJJRFCCCGEtUzf29YsyiEBkRWuXr0KQGhoqJNLIoQQQghbXb16FV9f31zzyFpmVsjIyODs2bOUL18eg8Fgt/OmpKQQGhrKqVOnZI00K8jzso08L9vI87KNPC/byPOyjb2el1KKq1evUqVKFVxccu8lJDVEVnBxcSEkJMRh5/fx8ZH/IDaQ52UbeV62kedlG3letpHnZRt7PK+8aoZMpFO1EEIIIUo9CYiEEEIIUepJQOREHh4eTJkyBQ8PD2cXpViQ52UbeV62kedlG3letpHnZRtnPC/pVC2EEEKIUk9qiIQQQghR6klAJIQQQohSTwIiIYQQQpR6EhAJIYQQotSTgMhJPvnkE6pXr07ZsmVp06YNv/76q7OLVCi2bNnCgw8+SJUqVTAYDHz//fe6/UopXnvtNSpXrky5cuWIiIjg6NGjujyXL19m4MCB+Pj44Ofnx9ChQ7l27Zouz59//kn79u0pW7YsoaGhzJw509G3ZnfTp0/nrrvuonz58gQGBtKrVy+OHDmiy3Pr1i1GjhyJv78/3t7e9O7dm8TERF2ekydP0qNHDzw9PQkMDGTChAmkp6fr8mzatImWLVvi4eFB7dq1Wbx4saNvz+7mzZtH06ZNzRO5hYeHs3btWvN+eVa5mzFjBgaDgejoaHOaPLM7pk6disFg0G3169c375dnld2ZM2d4/PHH8ff3p1y5cjRp0oTff//dvL/I/b5XotAtW7ZMubu7q//85z/q4MGD6plnnlF+fn4qMTHR2UVzuB9//FG9/PLLKiYmRgHqu+++0+2fMWOG8vX1Vd9//73at2+feuihh1SNGjXUzZs3zXm6du2qmjVrpn755Re1detWVbt2bdW/f3/z/uTkZBUUFKQGDhyoDhw4oL766itVrlw59dlnnxXWbdpFZGSkWrRokTpw4IDau3ev6t69u6pWrZq6du2aOc9zzz2nQkND1YYNG9Tvv/+u7rnnHtW2bVvz/vT0dNW4cWMVERGh9uzZo3788UdVqVIlNXnyZHOe48ePK09PTzV27Fh16NAhNWfOHOXq6qrWrVtXqPdbUKtWrVJr1qxRf//9tzpy5Ih66aWXVJkyZdSBAweUUvKscvPrr7+q6tWrq6ZNm6rRo0eb0+WZ3TFlyhTVqFEjde7cOfN24cIF8355VnqXL19WYWFh6oknnlC7du1Sx48fV+vXr1f//POPOU9R+30vAZET3H333WrkyJHm10ajUVWpUkVNnz7diaUqfFkDooyMDBUcHKzeffddc1pSUpLy8PBQX331lVJKqUOHDilA/fbbb+Y8a9euVQaDQZ05c0YppdTcuXNVhQoVVGpqqjnPiy++qOrVq+fgO3Ks8+fPK0Bt3rxZKaU9mzJlyqjly5eb8xw+fFgBaufOnUopLQB1cXFRCQkJ5jzz5s1TPj4+5uczceJE1ahRI921+vbtqyIjIx19Sw5XoUIFtXDhQnlWubh69aqqU6eOio2NVR06dDAHRPLM9KZMmaKaNWtmcZ88q+xefPFF1a5duxz3F8Xf99JkVsjS0tLYvXs3ERER5jQXFxciIiLYuXOnE0vmfPHx8SQkJOieja+vL23atDE/m507d+Ln50fr1q3NeSIiInBxcWHXrl3mPPfddx/u7u7mPJGRkRw5coQrV64U0t3YX3JyMgAVK1YEYPfu3dy+fVv3vOrXr0+1atV0z6tJkyYEBQWZ80RGRpKSksLBgwfNeTKfw5SnOH8ejUYjy5Yt4/r164SHh8uzysXIkSPp0aNHtvuSZ5bd0aNHqVKlCjVr1mTgwIGcPHkSkGdlyapVq2jdujWPPvoogYGBtGjRggULFpj3F8Xf9xIQFbKLFy9iNBp1/ykAgoKCSEhIcFKpigbT/ef2bBISEggMDNTtd3Nzo2LFiro8ls6R+RrFTUZGBtHR0dx77700btwY0O7F3d0dPz8/Xd6szyuvZ5FTnpSUFG7evOmI23GY/fv34+3tjYeHB8899xzfffcdDRs2lGeVg2XLlvHHH38wffr0bPvkmem1adOGxYsXs27dOubNm0d8fDzt27fn6tWr8qwsOH78OPPmzaNOnTqsX7+e4cOH88ILL/D5558DRfP3vax2L0QxMHLkSA4cOMC2bducXZQirV69euzdu5fk5GS+/fZbhgwZwubNm51drCLp1KlTjB49mtjYWMqWLevs4hR53bp1M/+7adOmtGnThrCwML755hvKlSvnxJIVTRkZGbRu3Zq3334bgBYtWnDgwAE+/fRThgwZ4uTSWSY1RIWsUqVKuLq6Zht9kJiYSHBwsJNKVTSY7j+3ZxMcHMz58+d1+9PT07l8+bIuj6VzZL5GcTJq1ChWr15NXFwcISEh5vTg4GDS0tJISkrS5c/6vPJ6Fjnl8fHxKXa/6N3d3alduzatWrVi+vTpNGvWjNmzZ8uzsmD37t2cP3+eli1b4ubmhpubG5s3b+ajjz7Czc2NoKAgeWa58PPzo27duvzzzz/y+bKgcuXKNGzYUJfWoEEDczNjUfx9LwFRIXN3d6dVq1Zs2LDBnJaRkcGGDRsIDw93Ysmcr0aNGgQHB+ueTUpKCrt27TI/m/DwcJKSkti9e7c5z8aNG8nIyKBNmzbmPFu2bOH27dvmPLGxsdSrV48KFSoU0t0UnFKKUaNG8d1337Fx40Zq1Kih29+qVSvKlCmje15Hjhzh5MmTuue1f/9+3S+V2NhYfHx8zL+swsPDdecw5SkJn8eMjAxSU1PlWVnQuXNn9u/fz969e81b69atGThwoPnf8sxydu3aNY4dO0blypXl82XBvffem22akL///puwsDCgiP6+t7kbtiiwZcuWKQ8PD7V48WJ16NAhNWzYMOXn56cbfVBSXb16Ve3Zs0ft2bNHAeqDDz5Qe/bsUf/++69SShuG6efnp1auXKn+/PNP9fDDD1schtmiRQu1a9cutW3bNlWnTh3dMMykpCQVFBSkBg0apA4cOKCWLVumPD09i92w++HDhytfX1+1adMm3VDfGzdumPM899xzqlq1amrjxo3q999/V+Hh4So8PNy83zTUt0uXLmrv3r1q3bp1KiAgwOJQ3wkTJqjDhw+rTz75pFgO9Z00aZLavHmzio+PV3/++aeaNGmSMhgM6qefflJKybOyRuZRZkrJM8ts3LhxatOmTSo+Pl5t375dRUREqEqVKqnz588rpeRZZfXrr78qNzc39dZbb6mjR4+qJUuWKE9PT/Xll1+a8xS13/cSEDnJnDlzVLVq1ZS7u7u6++671S+//OLsIhWKuLg4BWTbhgwZopTShmK++uqrKigoSHl4eKjOnTurI0eO6M5x6dIl1b9/f+Xt7a18fHzUk08+qa5evarLs2/fPtWuXTvl4eGhqlatqmbMmFFYt2g3lp4ToBYtWmTOc/PmTTVixAhVoUIF5enpqR555BF17tw53XlOnDihunXrpsqVK6cqVaqkxo0bp27fvq3LExcXp5o3b67c3d1VzZo1ddcoLp566ikVFham3N3dVUBAgOrcubM5GFJKnpU1sgZE8szu6Nu3r6pcubJyd3dXVatWVX379tXNqSPPKrsffvhBNW7cWHl4eKj69eur+fPn6/YXtd/3BqWUsq1OSQghhBCiZJE+REIIIYQo9SQgEkIIIUSpJwGREEIIIUo9CYiEEEIIUepJQCSEEEKIUk8CIiGEEEKUehIQCSGEEKLUk4BICCGEEKWeBERCCKfo2LEj0dHRDjt/9erVmTVrlsPObw1H36MQwn7cnF0AIYRwhN9++w0vLy+nliEmJoYyZcqYX1evXp3o6GgJkoQogiQgEkKUSAEBAU67dlpaGu7u7lSsWNFpZRBC2EaazIQQRUJqairjx4+natWqeHl50aZNGzZt2mTe/++///Lggw9SoUIFvLy8aNSoET/++GOO58vaZGYwGFi4cCGPPPIInp6e1KlTh1WrVpn3X7lyhYEDBxIQEEC5cuWoU6cOixYtMu8/ffo0/fv3p2LFinh5edG6dWt27doFwNSpU2nevDkLFy6kRo0alC1bFtA3mXXs2JF///2XMWPGYDAYMBgM+bovIYRjSA2REKJIGDVqFIcOHWLZsmVUqVKF7777jq5du7J//37q1KnDyJEjSUtLY8uWLXh5eXHo0CG8vb1tusa0adOYOXMm7777LnPmzGHgwIH8+++/VKxYkVdffZVDhw6xdu1aKlWqxD///MPNmzcBuHbtGh06dKBq1aqsWrWK4OBg/vjjDzIyMszn/ueff1ixYgUxMTG4urpmu3ZMTAzNmjVj2LBhPPPMM+Z0e9yXEKLgJCASQjjdyZMnWbRoESdPnqRKlSoAjB8/nnXr1rFo0SLefvttTp48Se/evWnSpAkANWvWtPk6TzzxBP379wfg7bff5qOPPuLXX3+la9eunDx5khYtWtC6dWtAq2EyWbp0KRcuXOC3334zN4PVrl1bd+60tDT++9//5thUV7FiRVxdXSlfvjzBwcG6ey/ofQkhCk4CIiGE0+3fvx+j0UjdunV16ampqfj7+wPwwgsvMHz4cH766SciIiLo3bs3TZs2tek6mfN7eXnh4+PD+fPnARg+fDi9e/fmjz/+oEuXLvTq1Yu2bdsCsHfvXlq0aJFrn6CwsLB89Vuyx30JIQpO+hAJIZzu2rVruLq6snv3bvbu3WveDh8+zOzZswF4+umnOX78OIMGDWL//v20bt2aOXPm2HSdzCO+QOtXZGr26tatm7mPz9mzZ+ncuTPjx48HoFy5cnmeO78j2uxxX0KIgpOASAjhdC1atMBoNHL+/Hlq166t2zI3L4WGhvLcc88RExPDuHHjWLBggV3LERAQwJAhQ/jyyy+ZNWsW8+fPB7Sapb1793L58uUCnd/d3R2j0Zgt3dH3JYTImwREQginq1u3LgMHDmTw4MHExMQQHx/Pr7/+yvTp01mzZg0A0dHRrF+/nvj4eP744w/i4uJo0KCB3crw2muvsXLlSv755x8OHjzI6tWrzefv378/wcHB9OrVi+3bt3P8+HFWrFjBzp07bbpG9erV2bJlC2fOnOHixYuFcl9CCOtIQCSEKBIWLVrE4MGDGTduHPXq1aNXr1789ttvVKtWDQCj0cjIkSNp0KABXbt2pW7dusydO9du13d3d2fy5Mk0bdqU++67D1dXV5YtW2be99NPPxEYGEj37t1p0qQJM2bMsDiaLDevv/46J06coFatWub+Ro6+LyGEdQxKKeXsQgghhBBCOJPUEAkhhBCi1JOASAghhBClngREQgghhCj1JCASQgghRKknAZEQQgghSj0JiIQQQghR6klAJIQQQohSTwIiIYQQQpR6EhAJIYQQotSTgEgIIYQQpZ4EREIIIYQo9f4fhG2aSffhw8cAAAAASUVORK5CYII=",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "#interpretation\n",
+    "fig,ax = plt.subplots()\n",
+    "ax.scatter(x_test,y_test,color='blue')\n",
+    "ax.plot(x_test,predict,color='red')\n",
+    "plt.title(\"Prédiction de la Présence (Attendency) (R2 = 0.98)\")\n",
+    "plt.xlabel(\"les inscrits\")\n",
+    "plt.ylabel(\"les presents\")\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "b962144b-ab7e-42e3-ba8e-2b31953d64ca",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "avec MAE : 28.234713006519993\n",
+      "avec R2 : 0.9824798790918774\n"
+     ]
+    }
+   ],
+   "source": [
+    "#mesure performance\n",
+    "print ('avec MAE :', mean_absolute_error(y_test,predict))\n",
+    "print ('avec R2 :', r2_score(y_test,predict))\n",
+    "#score r2 mmoyen , ameliorons ca avec les dates"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f3a0ef8-9b7b-4cc1-a332-18bf1b8d136c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10 (ml-env)",
+   "language": "python",
+   "name": "ml-env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}