Schema_Study_BILD5 / config.py
keefereuther's picture
Update syllabus to SP26 (LFS), refresh config and terms CSV
3b3640b
# config.py
#
# ======================================================================
# SETTINGS FOR THE SCHEMA STUDY APP
# ======================================================================
#
# This file contains all the customizable settings for your app.
# If you're not familiar with programming, don't worry! Just follow
# the instructions next to each setting and edit the values between
# the quotation marks or after the equals sign.
#
# IMPORTANT:
# - DO NOT remove any quotation marks or equals signs
# - DO NOT change any variable names (the words before the equals sign)
# - DO NOT delete any lines or sections.
# - Keep your file in the same folder as app.py
#
# ======================================================================
# ==============================================
# 1. BASIC APP SETTINGS
# ==============================================
# The title shown at the top of your app
app_title = "Schema Study"
# The file containing your terms and their definitions
# This must be a CSV file in the same folder as app.py
# The CSV file should have two columns: first column for terms, second column for context
default_terms_csv = "terms.csv"
# Warning message shown at the bottom of the chat window
warning_message = "**ChatGPT can make errors and does not replace verified and reputable online and classroom resources. Do NOT enter any private, confidential, or personally identifiable information.**"
# ==============================================
# 2. AI MODEL SETTINGS
# ==============================================
# The OpenAI model used by the app
# Options: "gpt-5.1" (default, reasoning model) or "gpt-4.1" (non-reasoning model)
# - gpt-5.1: Latest reasoning model with reasoning="none" default for faster responses
# - gpt-4.1: Non-reasoning model with temperature control
ai_model = "gpt-5.4-mini"
# Reasoning effort for gpt-5.1 (only applies when ai_model = "gpt-5.1")
# Options: "none" (default, fastest), "minimal", "low", "medium"
# "none" disables reasoning for faster responses without reasoning overhead
reasoning_effort = "none"
# Temperature for gpt-4.1 (only applies when ai_model = "gpt-4.1")
# Controls randomness/creativity (0-2)
# Lower values (0.1-0.3) = more focused, precise responses
# Higher values (0.7-1.0) = more creative, varied responses
temperature = 0.1
# Maximum length of AI responses (measured in tokens)
# Higher values allow for longer responses (1000-2000 is usually sufficient)
# Maps to max_output_tokens in Responses API
max_tokens = 1000
# Enable web search functionality (only applies when ai_model supports web search)
# When enabled, the AI can search the web for current information and cite sources
# Note: Web search is incompatible with reasoning effort - reasoning will be disabled automatically
# Options: True (enabled) or False (disabled, default)
enable_web_search = True
# ==============================================
# 3. STUDENT INSTRUCTIONS
# ==============================================
# Instructions displayed to students when they click the instructions expander
instructions = '''
The goal of this app is to help you learn and and assess your knowledge of core course concepts and examples.
1. Choose a course term/phrase from the drop down menu.
2. *Pause and think for 30 seconds.* What is everything you associate with this term/phrase? What is a simple definition or example?
3. Write as little or as much as you'd like about it. Try to include anything you might need to know for an exam.
4. Please follow-up with questions. If you don't know something, just ask. It is perfectly ok to write: "I have no idea what this term means." **Have a conversation!**
'''
# ==============================================
# 4. PROMPT TEMPLATE BUTTONS
# ==============================================
# These templates appear as buttons students can click to start different types of activities
# Each template has a "name" (shown on the button) and a "template" (the text sent when clicked)
# You can edit the existing templates or add new ones following the same format
# The {term} will be replaced with the selected term from the dropdown
# The {term_list} will be replaced with the list of all terms
prompt_templates = [
{
"name": "Misconception Check",
"template": "What are some common misconceptions about {term}? Help me identify and correct them by asking me multiple choice questions."
},
{
"name": "Two Truths & a Lie",
"template": "Tell me two truths and one lie about {term}. I'll try to identify the lie and explain my reasoning."
},
{
"name": "Connect Terms",
"template": "I want to test my ability to connect {term} to others in the term list. First, give me an example of how to connect the terms 'bats' and 'nitrogen' in a hypothetical real-life scenario. Second, prompt me to similarly create a logical applied scenario between the displayed term and one other you MUST CHOOSE from the course term list one term that is directly related to {term}. When you first display the terms, do NOT give me any additional information about either term. Your role is to provide feedback whether the scenario I create logically and accurately links the two terms."
},
{
"name": "Schema Map",
"template": "What are all the direct connections between {term} and the other terms among {term_list}? Help me create a concept map for {term}."
},
{
"name": "Review the midterm",
"template": '''
You are a highly skilled, patient BILD 5 tutor helping a student review their "BILD 5 F25 Midterm Exam" so they can do well on the final.
The student must choose which exam question to work on. Do not introduce new questions on your own. For each question they bring up:
- First, ask them to say what they think the answer is and why.
- Respond with a **single short Socratic question** that targets their reasoning on that one question only.
- Do **not** reveal the correct answer or a full explanation unless the student explicitly asks you to. Even then, keep explanations brief and tied to their thinking.
- When they are correct, briefly confirm and ask one deeper follow‑up; when they are incorrect or unsure, give a tiny hint and ask another focused question instead of fixing everything at once.
- Keep each turn concise, concrete, and in everyday language (no extra jargon).
Always finish your turn with both:
1) One specific Socratic question about the current exam question.
2) A simple invitation like: “Do you have any questions about this question or others? Would you like a similar practice example?”
Ignore {term} and {term_list} when providing your response; just focus on the exam questions the student chooses.
--------------------------------
# BILD 5 – Spring 25 Midterm Exam – Reuther
**25 points**
## MULTIPLE CHOICE SECTION (1 point each - choose the single most appropriate answer):
**1.** As a field biologist studying endangered coral reef ecosystems, which task would benefit LEAST from computational/programming skills?
a) Analyzing thousands of underwater photos to quantify coral bleaching rates
b) Creating a drawing of a newly discovered species
c) Processing environmental sensor data collected every minute for six months
d) Building predictive models of reef recovery under different temperature scenarios
e) Automating species identification from acoustic recordings
**2.** You collect data on frog mating calls with columns: `frog_id`, `date`, `temperature`, `humidity`, `call_frequency_hz`, `call_duration_sec`, `number_of_calls`. Is this dataset "tidy"?
a) No—environmental variables should be in a separate table
b) Yes—each row represents one observation with variables in separate columns
c) No—multiple call measurements should be combined into one column
d) Yes—but only if we reshape it to have one row per individual frog
e) Cannot determine without seeing the actual data values
**3.** A marine biologist measures shell thickness in 150 limpets and finds the data heavily right-skewed due to a few extremely thick shells. Which measure of central tendency best represents the "typical" shell thickness?
a) Mean, because it uses all data points
b) Mode, because it shows the most common value
c) Range, because it captures the full variation
d) Median, because it's resistant to extreme values
e) Standard deviation, because it quantifies spread
**4.** In studying photosynthesis rates in algae, you calculate the variance of your measurements. This statistic specifically tells you:
a) The average photosynthesis rate across all samples
b) How much individual measurements differ from the mean
c) The middle value when all rates are ordered
d) The probability of obtaining your results by chance
e) The difference between highest and lowest rates
**5.** According to the Central Limit Theorem, if you repeatedly sample groups of 30 butterflies from a population with non-normal wingspan distribution and calculate each group's mean wingspan, what pattern emerges?
a) The sample means will match the original skewed distribution
b) The sample means will form an approximately normal distribution
c) The sample means will become increasingly variable
d) The sample means will cluster around the population median
e) Nothing predictable—the pattern will be random
**6.** A researcher studying bacterial growth writes: "H₀: Treatment A and Treatment B produce different growth rates." What is wrong with this hypothesis statement?
a) It's not testable with statistics
b) The null hypothesis should be a specific statement of equality (μ₁ = μ₂), not a vague claim about differences
c) It doesn't specify which statistical test to use
d) The null hypothesis must ALWAYS be one of zero difference or no effect.
e) Nothing—this is correctly stated
**7.** You're comparing nest-building times between two bird species. Your histogram shows one species has a roughly normal distribution while the other is strongly left-skewed. To proceed with a t-test, you should:
a) Use the t-test anyway since one group is normal
b) Remove all data from the skewed group
c) Check sample sizes and consider transformations or non-parametric alternatives
d) Only analyze the normally distributed group
e) Combine both groups into one dataset
**8.** In a power analysis for an experiment on fish growth rates, increasing your sample size from n=10 to n=50 per group while keeping everything else constant will:
a) Decrease both Type I and Type II error rates
b) Increase your ability to detect a true effect if it exists
c) Make the effect size larger
d) Guarantee statistical significance
e) Reduce the need for proper experimental controls
**9.** You obtain p = 0.03 with α = 0.05 and reject the null hypothesis. Unknown to you, the null hypothesis was actually true. This represents:
a) A correct decision (true negative)
b) Type I error (false positive)
c) Type II error (false negative)
d) A correct decision (true positive)
e) Insufficient information to classify
**10.** Examining the relationship between effect size, sample size, alpha, and power, which single statement is FALSE?
a) Larger effect sizes are easier to detect with smaller samples
b) Increasing alpha increases power but also increases Type I error risk
c) Power increases as sample size increases
d) Observed effect size determines the biological importance. Large effect sizes are more biologically important.
e) You can achieve the same power with a smaller sample by increasing the minimum effect size you're willing to detect
**11.** A researcher studying leaf sizes in oak trees reports: "The mean leaf length was 12.3 cm (SE = 0.45 cm, SD = 3.2 cm, n = 50)." A colleague questions why both SE and SD are reported. Which statement best explains the distinct information each provides?
a) SE and SD are the same thing, just calculated differently
b) SD describes variability in the individual leaves; SE describes uncertainty about the estimated mean
c) SE describes variability in the individual leaves; SD describes uncertainty about the estimated mean
d) SD is always larger than SE due to calculation errors
e) Both describe the same variability but SE is preferred for larger samples
**12.** You calculate a 95% confidence interval for the difference in hormone levels between stressed and control fish: [2.3, 8.7] ng/mL. A colleague asks you to instead report a 99% confidence interval using the same data. Without recalculating, what can you predict about the 99% CI?
a) It will be narrower than [2.3, 8.7] because 99% is more confident
b) It will be wider than [2.3, 8.7] because greater confidence requires a wider range
c) It will have the same width but shifted to center on zero
d) It will be [0.23, 0.87] because confidence scales proportionally
e) Cannot predict without knowing the sample size
**13.** A pharmaceutical company tests a new fertilizer on 10,000 corn plants and finds it increases yield by 0.02% (p = 0.0001). The marketing team wants to advertise this as "significantly improves crop yield!" As the data scientist, what is your primary concern?
a) The p-value is too small to be trustworthy
b) The sample size is too large for valid statistics
c) The effect is statistically significant but practically meaningless
d) The null hypothesis was incorrectly specified
e) Corn yield cannot be measured precisely enough
**14.** A student analyzing butterfly wing patterns writes the following R code:
```r
butterfly_data <- read.csv("butterflies.csv")
mean_wingspan <- mean(butterfly_data$wingspan_mm)
median_wingspan <- median(butterfly_data$wingspan_mm)
mean_wingspan
```
What will be displayed in the R console when this code runs?
a) Both the mean and median wingspan values
b) Only the mean wingspan value
c) Only the median wingspan value
d) The entire butterfly_data dataset
e) An error message because median_wingspan was not printed
**15.** A research team studying antibiotic resistance in bacteria wants to design a manipulative experiment. They have access to a laboratory, bacterial cultures, various antibiotics, and standard growth media. Which research question would most directly lead to clear statistical hypotheses for a manipulative experiment?
a) How does antibiotic exposure affect bacterial populations in natural environments?
b) What factors influence the development of antibiotic resistance in bacteria?
c) Does exposure to sub-lethal doses of ampicillin (0.5 μg/mL for 48 hours) increase the survival rate of E. coli when subsequently treated with a lethal dose (10 μg/mL)?
d) Is there a relationship between antibiotic concentration and bacterial resistance?
e) Do bacteria develop resistance faster when exposed to antibiotics compared to bacteria that are not exposed to antibiotics?
---
## SHORT ANSWER SECTION:
**16.** A graduate student studying desert lizard metabolism has the following R code and output:
```r
# Load data
lizard_data <- read.csv("desert_lizards.csv")
# Check structure
str(lizard_data)
# 'data.frame': 120 obs. of 5 variables:
# $ species : chr "horned" "horned" "collared" ...
# $ temp_C : num 28.5 31.2 29.8 ...
# $ mass_g : num 45.2 38.9 52.1 ...
# $ metabolic_rate: num 0.82 0.91 1.05 ...
# $ activity_level: chr "low" "medium" "high" ...
# Create visualization
library(ggplot2)
ggplot(lizard_data, aes(x = temp_C, y = metabolic_rate, color = species)) +
geom_point(size = 3) +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ activity_level) +
labs(title = "Desert Lizard Metabolic Rates",
x = "Temperature (°C)",
y = "Metabolic Rate (mL O2/g/hr)")
```
a) **[2 points]** Describe in words what visualization this code produces. Be specific about what is shown.
**GRADING RUBRIC (2 points total):**
- 0.5 points: Identifies as a scatterplot
- 0.5 points: Identifies X axis as temperature (temp_C)
- 0.5 points: Identifies Y axis as metabolic rate
- 0.5 points: Identifies color as representing different species
b) **[2 points]** The student wants to test if the mean metabolic rate differs between species. What two assumption-checking steps should they take to make sure the assumption of normality is not violated?
**GRADING RUBRIC (2 points total):**
- 0.5 points: Names appropriate method (histogram, Q-Q plot, density plot, boxplot)
- 0.5 points: Names they are looking for deviations from normality (can also optionally mention outliers)
- 0.5 points: Names a statistical test like Kolmogorov-Smirnov (KS) or Shapiro-Wilk
- 0.5 points: States that a p-value < 0.05 indicates a violation of normality
**17.** A researcher tested the hypothesis: "Different fertilizer types (organic, synthetic, control) affect tomato plant height (cm)." They collected height measurements from 40 plants per fertilizer type and created the following figure:
### FIGURE DESCRIPTION:
The figure is a line graph with the following characteristics:
- **Title**: "data" (appears in the top left corner)
- **X-axis**: Labeled "type" with three categorical values: "Control", "Organic", and "Synthetic"
- **Y-axis**: Labeled "values" with a scale ranging from 40 to 55
- **Data representation**: Three data points connected by black lines:
- Control: approximately 45 units
- Organic: approximately 52 units (highest point)
- Synthetic: approximately 48 units
- **Graph style**: The three points are connected by straight black lines forming a peaked shape, with the organic fertilizer showing the highest value
- **Grid**: Light gray gridlines in the background
- **Point markers**: Black filled circles at each data point
a) **[2 points]** List TWO specific problems with this figure.
**GRADING RUBRIC (2 points total):**
- 0.75 points: States one problem
- Acceptable problems:
1. Wrong geom type (line graph for categorical data)
2. Poor/missing labels
3. No variability shown (no error bars)
4. No sample size information
- 0.25 points: States why the first problem is a bad decision
- 0.75 points: States a second problem
- 0.25 points: States why the second problem is wrong
b) **[2 points]** What type of visualization should the researcher have used instead?
**GRADING RUBRIC (2 points total):**
- 2.0 points: Names appropriate type (boxplot, violin plot, bar plot with error bars)
- 1.0 point: Partial credit for another visualization that could use categorical data but is not optimal (like a pie chart)
**18. [2 points]** Your friend is not a science major and asks you to explain something they heard about. Choose ONE of the scenarios below and write a clear explanation using concepts from BILD 5. Your explanation should help them understand the statistical concept using their specific situation.
**SCENARIO A: Confidence Interval**
Your friend is working on a psychology research project about college student sleep patterns. They surveyed 50 randomly selected UCSD students and found the average sleep duration was 6.5 hours per night, with a 95% confidence interval of [6.1, 6.9] hours. Your friend says: "So this means that 95% of UCSD students sleep between 6.1 and 6.9 hours, right?"
Use this specific scenario to help them understand what "95% confident" refers to.
**GRADING RUBRIC FOR SCENARIO A (2 points total):**
- 0.5 points: Clarifies the CI is NOT about 95% of individuals falling in the range
- 0.5 points: Explains it's about uncertainty in estimating the population mean
- 1.0 point: Explains the repeated sampling interpretation or another accurate definition (if we repeated this study 100 times, 95 intervals would capture the true mean)
- 0.5 points: Partial credit for a CI definition that is generally OK but has some aspect that is incorrect
**OR**
**SCENARIO B: p-value**
Your friend is reading a news article that says: "A new study found that people who eat chocolate daily have better memory (p = 0.04). This proves chocolate improves memory!" Your friend asks: "What does p = 0.04 mean? Does it mean there's only a 4% chance they're wrong?"
Explain what the p-value actually tells us in this study.
**GRADING RUBRIC FOR SCENARIO B (2 points total):**
- 0.5 points: Corrects the "4% chance they're wrong" misconception
- 0.5 points: Includes an explicit reference to "assuming the null hypothesis is true"
- 1.0 point: Clearly defining it as a probability you'd see a difference/test statistic at least this large due to random chance alone
- 0.75 points: Partial credit if not saying "at least this large" or not indicating area under the curve (e.g., if they said it was the probability of getting this exact test statistic due to chance alone)
---
**Total: 25 points**
Page 8/8
---
## GRADING SUMMARY
**Multiple Choice Section:** 15 questions × 1 point each = 15 points
**Short Answer Section:** 10 points total
- Question 16a: 2 points
- Question 16b: 2 points
- Question 17a: 2 points
- Question 17b: 2 points
- Question 18: 2 points
**Total Exam Points: 25 points**
'''
},
{
"name": "Create a Study Plan",
"template": "Please ask me about what assessments I have coming up. You will then ask me simple questions about what I need to know to do well on the assessment and surmise my study preferences. You will then help me create a scaffolded, spaced-repetition study plan to gain the mastery needed to do well on the assessment at all levels of Bloom's taxonomy."
}
]
# ==============================================
# 5. ATTRIBUTION AND LICENSE
# ==============================================
# Information about who created the app (appears in the sidebar)
app_creation_message = "This app, its corresponding manuscript, and all documentation was authored, edited, and tested by Keefe Reuther, [Liam O Mueller](https://biology.ucsd.edu/research/faculty/lomueller), and the members of the Reuther Lab - [https://keefereuther.com](https://keefereuther.com)"
# License and repository information (appears in the sidebar)
app_repo_license_message = "It can be found at [https://huggingface.co/spaces/keefereuther/Schema_Study_BILD5](https://huggingface.co/spaces/keefereuther/Schema_Study_BILD5) and is distributed under the GNU GPL-3 License. If you are interested in creating your own version of this application for use in your classroom, please email kdreuther@ucsd.edu for more information."
# ==============================================
# 6. RESOURCES LIST
# ==============================================
# Resources shown in the sidebar that students can access
# You can add/remove/edit resources
# Each resource can have:
# - "title": The name of the resource shown in the sidebar
# - "url": Link to an external website (optional)
# - "file_path": Path to a downloadable file in your app folder (optional)
# - "description": Text explaining the resource
resources = [
{
"title": "Course Syllabus",
"file_path": "BILD_5_Syllabus_Reuther_SP26.pdf",
"description": "Download the course syllabus. **Instructor Note:** You must place the file itself within the same folder as the main app.py file in your GitHub repository."
},
{
"title": "OpenAI Prompt engineering guide",
"url": "https://platform.openai.com/docs/guides/prompt-engineering/six-strategies-for-getting-better-results",
"description": "A guide to help you craft effective prompts for the OpenAI chatbot. It includes best practices and examples to improve the quality of responses."
},
{
"title": "UC Berkeley Library Guide to Detecting Fake News",
"url": "https://guides.lib.berkeley.edu/fake-news",
"description": "This UC Berkeley Library guide offers comprehensive strategies and resources for identifying fake news, understanding its impact, and evaluating the credibility of various news sources, including lists of known fake news sites and tips for detecting misinformation."
},
{
"title": "Is it cheating to use ChatGPT?",
"url": "https://academicintegrity.ucsd.edu/excel-integrity/gen-ai/ai-in-education.html",
"description": "The UC San Diego Academic Integrity Office provides guidance on the appropriate use of generative AI tools in educational settings, emphasizing the importance of adhering to instructor guidelines and the potential consequences of misuse, including integrity violations and academic penalties."
},
{
"title": "OpenStax - Biology",
"url": "https://openstax.org/details/books/biology",
"description": "Provides free, peer-reviewed, openly licensed textbooks for introductory college and AP-level biology courses."
},
{
"title": "Scitable by Nature Education",
"url": "https://www.nature.com/scitable",
"description": "A free science library and personal learning tool focusing on genetics, cell biology, and related topics. It offers articles, eBooks, and educational resources from experts and is part of Nature Education."
}
]
# ==============================================
# 7. AI SYSTEM PROMPT (ADVANCED)
# ==============================================
#
# THIS SECTION CONTROLS HOW THE AI ASSISTANT BEHAVES
# Only edit this if you know what you're doing!
#
# DO NOT REMOVE/EDIT anything inside the curly braces = '{selected_term}', '{selected_context}', '{term_list}'
# These are placeholder variables that get filled in automatically by the app
def term_prompt(selected_term, selected_context, term_list):
return f"""You are Pliny 😊, a friendly and knowledgeable AI biology tutor for university students. Your mission is to help students build a robust understanding of these course-relevant biology terms and concepts: '{term_list}' This includes clarifying definitions, providing examples, addressing misconceptions, exploring applications, and encouraging connections between terms. You NEVER directly answer a question without first trying to get the student to answer it themselves EXCEPT if it a term related to the course syllabus, If it is related to the syllabus or course logistics, give a complete and accurate immediate answer.
**Guidelines:**
#### **Communication Style:**
- Use clear, simple language and avoid unnecessary jargon.
- Be succinct but make sure to respond to all statements made by the user.
- Be approachable and professional.
- Provide information step-by-step to manage cognitive load.
- Use culturally inclusive examples and analogies that do not require advanced biological knowledge.
- KEEP EACH RESPONSE SHORT.
#### **Feedback and Encouragement:**
- Offer constructive feedback and gently correct errors.
- Acknowledge correct reasoning and reinforce a growth mindset by celebrating effort and progress.
- Invite further questions to foster dialogue.
#### **Expectations for Interaction:**
- Unless there is a specific reason to do otherwise, you should assume the student is asking about '{selected_term}'.
#### **Context-Driven Support:**
- Always preferentially use the following information to guide your response: '{selected_context}'. Do not provide all of this information at once; rather, use it to inform your feedback. This information provides context for how the course uses the selected term, but is not comprehensive and should not limit the student's thinking.
#### **Critical Thinking and Engagement (PACING RULES):**
- Assess and help build the student's understanding of the term '{selected_term}'.
- **Ask exactly ONE Socratic question per turn, grounded in ONE concise, concrete applied scenario.** Fold the scenario into the question so there is only one question mark in your entire message.
- **Never present multiple options or multiple questions in the same turn.** Do not offer alternatives like "Option A/Option B" or ask follow-up questions in the same message.
- If the student has not answered your previous question, do not ask a new one; briefly encourage them to attempt an answer first.
- If the student explicitly requests more options, first confirm, then provide **at most one alternative question** on the next turn (still one question total in that turn).
- When responses are incorrect or partial, give brief, targeted feedback and then pose one new question (again, a single scenario-bound question).
#### **Response Clarity and Continuity:**
- End the message with your **single** Socratic question that already embeds the applied scenario (e.g., "Near a cave where bat guano enriches soils, how would you expect nitrate levels to change across seasons, and why?").
- **Do not append additional questions after the main question.** Stop after the single question.
- If a student selects a question without attempting to answer it, ask them to try to answer it themselves first.
- Suggest links between '{selected_term}' and other terms like '{term_list}' across turns (not by adding more questions in the same turn).
#### **Constraints:**
- You are only allowed to talk about topics relevant to what a biology student would need to know to succeed in a biology course, graduate, and follow a path to a relevant career. If asked about anything else, you should say that you are not allowed to talk about that topic. Connect their irrelevant question back to '{selected_term}' in a fun way that is still professional.
- Do NOT answer multiple-choice, fill-in-the-blank, or true/false questions. These are not allowed. However you are encouraged to create your own multiple-choice, fill-in-the-blank, or true/false questions to challenge the student. When you do so, still obey the **one-question-per-turn** rule by presenting only one item.
---
### Additional Code Usage Guidelines
Assume the student is using R and the tidyverse and has little to no command line experience.
#### **Visualization**:
- All visualizations must be created using **ggplot2** from the tidyverse. Avoid any other plotting libraries.
#### **Code Style**:
- Write all examples using **tidyverse** conventions for data manipulation and ggplot2 for visualizations.
- Write all code examples using the penguins dataset from the palmerpenguins or the iris dataset from the datasets package.
- Include thorough comments in all code examples, explaining each line or block in plain language for beginners.
#### **Encouraging Understanding**:
- Do not provide direct solutions to assignment-style questions. Instead, reframe questions to demonstrate generalizable concepts and guide students to apply these concepts themselves—while still asking exactly **one** scenario-grounded Socratic question per turn.
By following these instructions, you will provide clear and relevant guidance, helping students learn effectively while maintaining the course's academic integrity.
"""