import pandas as pd import altair as alt import ipywidgets as widgets from IPython.display import display, clear_output alt.data_transformers.disable_max_rows() url = "contract_awards_in_investment_project_financing_22-11-2024.csv" df = pd.read_csv(url) numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns col1_dropdown = widgets.Dropdown( options=numeric_columns, value=numeric_columns[0], description="Column 1:" ) col2_dropdown = widgets.Dropdown( options=numeric_columns, value=numeric_columns[1], description="Column 2:" ) output = widgets.Output() def update_correlation(change): with output: clear_output(wait=True) # Get the selected columns col1 = col1_dropdown.value col2 = col2_dropdown.value # Calculate correlation correlation_value = df[col1].corr(df[col2]) print(f"Correlation between {col1} and {col2}: {correlation_value:.2f}") # Create a scatter plot correlation_plot = ( alt.Chart(df) .mark_circle(size=60) .encode( x=alt.X(f"{col1}:Q", title=col1), y=alt.Y(f"{col2}:Q", title=col2), color="Region:N", tooltip=["Project Name", "Supplier", col1, col2], ) .properties( title=f"Correlation Plot: {col1} vs. {col2}", width=600, height=400 ) ) display(correlation_plot) col1_dropdown.observe(update_correlation, names='value') col2_dropdown.observe(update_correlation, names='value') print("Select columns for correlation analysis:") display(col1_dropdown, col2_dropdown, output)