| import streamlit as st |
| import pandas as pd |
| import altair as alt |
|
|
| url = "contract_awards_in_investment_project_financing_22-11-2024.csv" |
| df = pd.read_csv(url) |
|
|
| # Filter numeric columns for correlation analysis |
| numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns |
| st.header("Correlation Analysis") |
|
|
| # Dropdowns for user selection of columns to correlate |
| col1 = st.selectbox("Select First Numeric Attribute", numeric_columns, index=0) |
| col2 = st.selectbox("Select Second Numeric Attribute", numeric_columns, index=1) |
|
|
| # Correlation calculation |
| correlation_value = df[col1].corr(df[col2]) |
| st.write(f"**Correlation between {col1} and {col2}: {correlation_value:.2f}**") |
|
|
| # Create a scatter plot |
| correlation_plot = ( |
| alt.Chart(df) |
| .mark_circle(size=60) |
| .encode( |
| x=alt.X(f"{col1}:Q", title=col1), |
| y=alt.Y(f"{col2}:Q", title=col2), |
| color="Region:N", # Color by region for additional context |
| tooltip=["Project Name", "Supplier Name", col1, col2], |
| ) |
| .properties( |
| title=f"Correlation Plot: {col1} vs. {col2}", |
| width=600, |
| height=400 |
| ) |
| ) |
| st.altair_chart(correlation_plot, use_container_width=True) |