trohith89 commited on
Commit
bd9384c
·
verified ·
1 Parent(s): 790d49a

Update pages/3_EDA_and_Feature_Engineering.py

Browse files
pages/3_EDA_and_Feature_Engineering.py CHANGED
@@ -400,6 +400,8 @@ if 'df' in st.session_state:
400
  st.write("### HEATMAP | CORRELATION MATRIX")
401
  st.write("#### Label Encoding")
402
  import pandas as pd
 
 
403
  from sklearn.preprocessing import LabelEncoder
404
  import streamlit as st
405
 
@@ -423,13 +425,16 @@ if 'df' in st.session_state:
423
 
424
  # Display the mapping in Streamlit
425
  st.write(f"Label Encoding Mapping for Category: {category_mapping}")
426
-
 
 
 
427
  # Calculate correlation matrix
428
- corr = df.corr()
429
 
430
  # Create the heatmap plot
431
  fig, ax = plt.subplots(figsize=(20, 10))
432
- sns.heatmap(corr, annot=True, ax=ax)
433
 
434
  # Add title
435
  ax.set_title('Correlation Matrix')
@@ -437,13 +442,15 @@ if 'df' in st.session_state:
437
  # Adjust layout and render plot in Streamlit
438
  plt.tight_layout()
439
  st.pyplot(fig)
440
-
441
- st.markdown('''**Insights :**
442
-
 
443
  Correlation is a statistical measure that indicates the strength and direction of the linear relationship between two variables. The correlation coefficient ranges from -1 to 1, with the following interpretations:
 
 
 
 
444
 
445
- - -1: Perfect negative correlation (as one variable increases, the other decreases)
446
- - 0: No correlation (the variables are independent)
447
- - 1: Perfect positive correlation (as one variable increases, the other increases)''')
448
  else:
449
  st.error("No dataset found. Please upload a dataset on the main page first.")
 
400
  st.write("### HEATMAP | CORRELATION MATRIX")
401
  st.write("#### Label Encoding")
402
  import pandas as pd
403
+ import seaborn as sns
404
+ import matplotlib.pyplot as plt
405
  from sklearn.preprocessing import LabelEncoder
406
  import streamlit as st
407
 
 
425
 
426
  # Display the mapping in Streamlit
427
  st.write(f"Label Encoding Mapping for Category: {category_mapping}")
428
+
429
+ # Calculate correlation matrix (only for numeric columns)
430
+ df_numeric = df.select_dtypes(include=['number'])
431
+
432
  # Calculate correlation matrix
433
+ corr = df_numeric.corr()
434
 
435
  # Create the heatmap plot
436
  fig, ax = plt.subplots(figsize=(20, 10))
437
+ sns.heatmap(corr, annot=True, ax=ax, cmap='coolwarm')
438
 
439
  # Add title
440
  ax.set_title('Correlation Matrix')
 
442
  # Adjust layout and render plot in Streamlit
443
  plt.tight_layout()
444
  st.pyplot(fig)
445
+
446
+ # Display insights in Streamlit
447
+ st.markdown('''**Insights:**
448
+
449
  Correlation is a statistical measure that indicates the strength and direction of the linear relationship between two variables. The correlation coefficient ranges from -1 to 1, with the following interpretations:
450
+
451
+ - -1: Perfect negative correlation (as one variable increases, the other decreases)
452
+ - 0: No correlation (the variables are independent)
453
+ - 1: Perfect positive correlation (as one variable increases, the other increases)''')
454
 
 
 
 
455
  else:
456
  st.error("No dataset found. Please upload a dataset on the main page first.")