import streamlit as st import pandas as pd import altair as alt url = "contract_awards_in_investment_project_financing_22-11-2024.csv" df = pd.read_csv(url) # Filter numeric columns for correlation analysis numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns st.header("Correlation Analysis") # Dropdowns for user selection of columns to correlate col1 = st.selectbox("Select First Numeric Attribute", numeric_columns, index=0) col2 = st.selectbox("Select Second Numeric Attribute", numeric_columns, index=1) # Correlation calculation correlation_value = df[col1].corr(df[col2]) st.write(f"**Correlation between {col1} and {col2}: {correlation_value:.2f}**") # Create a scatter plot correlation_plot = ( alt.Chart(df) .mark_circle(size=60) .encode( x=alt.X(f"{col1}:Q", title=col1), y=alt.Y(f"{col2}:Q", title=col2), color="Region:N", # Color by region for additional context tooltip=["Project Name", "Supplier Name", col1, col2], ) .properties( title=f"Correlation Plot: {col1} vs. {col2}", width=600, height=400 ) ) st.altair_chart(correlation_plot, use_container_width=True)