Update pages/3_EDA_and_Feature_Engineering.py
Browse files
pages/3_EDA_and_Feature_Engineering.py
CHANGED
|
@@ -400,6 +400,8 @@ if 'df' in st.session_state:
|
|
| 400 |
st.write("### HEATMAP | CORRELATION MATRIX")
|
| 401 |
st.write("#### Label Encoding")
|
| 402 |
import pandas as pd
|
|
|
|
|
|
|
| 403 |
from sklearn.preprocessing import LabelEncoder
|
| 404 |
import streamlit as st
|
| 405 |
|
|
@@ -423,13 +425,16 @@ if 'df' in st.session_state:
|
|
| 423 |
|
| 424 |
# Display the mapping in Streamlit
|
| 425 |
st.write(f"Label Encoding Mapping for Category: {category_mapping}")
|
| 426 |
-
|
|
|
|
|
|
|
|
|
|
| 427 |
# Calculate correlation matrix
|
| 428 |
-
corr =
|
| 429 |
|
| 430 |
# Create the heatmap plot
|
| 431 |
fig, ax = plt.subplots(figsize=(20, 10))
|
| 432 |
-
sns.heatmap(corr, annot=True, ax=ax)
|
| 433 |
|
| 434 |
# Add title
|
| 435 |
ax.set_title('Correlation Matrix')
|
|
@@ -437,13 +442,15 @@ if 'df' in st.session_state:
|
|
| 437 |
# Adjust layout and render plot in Streamlit
|
| 438 |
plt.tight_layout()
|
| 439 |
st.pyplot(fig)
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
|
|
|
| 443 |
Correlation is a statistical measure that indicates the strength and direction of the linear relationship between two variables. The correlation coefficient ranges from -1 to 1, with the following interpretations:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
|
| 445 |
-
- -1: Perfect negative correlation (as one variable increases, the other decreases)
|
| 446 |
-
- 0: No correlation (the variables are independent)
|
| 447 |
-
- 1: Perfect positive correlation (as one variable increases, the other increases)''')
|
| 448 |
else:
|
| 449 |
st.error("No dataset found. Please upload a dataset on the main page first.")
|
|
|
|
| 400 |
st.write("### HEATMAP | CORRELATION MATRIX")
|
| 401 |
st.write("#### Label Encoding")
|
| 402 |
import pandas as pd
|
| 403 |
+
import seaborn as sns
|
| 404 |
+
import matplotlib.pyplot as plt
|
| 405 |
from sklearn.preprocessing import LabelEncoder
|
| 406 |
import streamlit as st
|
| 407 |
|
|
|
|
| 425 |
|
| 426 |
# Display the mapping in Streamlit
|
| 427 |
st.write(f"Label Encoding Mapping for Category: {category_mapping}")
|
| 428 |
+
|
| 429 |
+
# Calculate correlation matrix (only for numeric columns)
|
| 430 |
+
df_numeric = df.select_dtypes(include=['number'])
|
| 431 |
+
|
| 432 |
# Calculate correlation matrix
|
| 433 |
+
corr = df_numeric.corr()
|
| 434 |
|
| 435 |
# Create the heatmap plot
|
| 436 |
fig, ax = plt.subplots(figsize=(20, 10))
|
| 437 |
+
sns.heatmap(corr, annot=True, ax=ax, cmap='coolwarm')
|
| 438 |
|
| 439 |
# Add title
|
| 440 |
ax.set_title('Correlation Matrix')
|
|
|
|
| 442 |
# Adjust layout and render plot in Streamlit
|
| 443 |
plt.tight_layout()
|
| 444 |
st.pyplot(fig)
|
| 445 |
+
|
| 446 |
+
# Display insights in Streamlit
|
| 447 |
+
st.markdown('''**Insights:**
|
| 448 |
+
|
| 449 |
Correlation is a statistical measure that indicates the strength and direction of the linear relationship between two variables. The correlation coefficient ranges from -1 to 1, with the following interpretations:
|
| 450 |
+
|
| 451 |
+
- -1: Perfect negative correlation (as one variable increases, the other decreases)
|
| 452 |
+
- 0: No correlation (the variables are independent)
|
| 453 |
+
- 1: Perfect positive correlation (as one variable increases, the other increases)''')
|
| 454 |
|
|
|
|
|
|
|
|
|
|
| 455 |
else:
|
| 456 |
st.error("No dataset found. Please upload a dataset on the main page first.")
|