IS445_Part2_VizForExperts / correlation_plot
harshidagli's picture
Create correlation_plot
3be72a6 verified
raw
history blame
1.18 kB
import streamlit as st
import pandas as pd
import altair as alt
url = "contract_awards_in_investment_project_financing_22-11-2024.csv"
df = pd.read_csv(url)
# Filter numeric columns for correlation analysis
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
st.header("Correlation Analysis")
# Dropdowns for user selection of columns to correlate
col1 = st.selectbox("Select First Numeric Attribute", numeric_columns, index=0)
col2 = st.selectbox("Select Second Numeric Attribute", numeric_columns, index=1)
# Correlation calculation
correlation_value = df[col1].corr(df[col2])
st.write(f"**Correlation between {col1} and {col2}: {correlation_value:.2f}**")
# Create a scatter plot
correlation_plot = (
alt.Chart(df)
.mark_circle(size=60)
.encode(
x=alt.X(f"{col1}:Q", title=col1),
y=alt.Y(f"{col2}:Q", title=col2),
color="Region:N", # Color by region for additional context
tooltip=["Project Name", "Supplier Name", col1, col2],
)
.properties(
title=f"Correlation Plot: {col1} vs. {col2}",
width=600,
height=400
)
)
st.altair_chart(correlation_plot, use_container_width=True)