harshidagli's picture
Create app.py
ef3afc6 verified
raw
history blame
1.75 kB
import pandas as pd
import altair as alt
import ipywidgets as widgets
from IPython.display import display, clear_output
alt.data_transformers.disable_max_rows()
url = "contract_awards_in_investment_project_financing_22-11-2024.csv"
df = pd.read_csv(url)
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
col1_dropdown = widgets.Dropdown(
options=numeric_columns,
value=numeric_columns[0],
description="Column 1:"
)
col2_dropdown = widgets.Dropdown(
options=numeric_columns,
value=numeric_columns[1],
description="Column 2:"
)
output = widgets.Output()
def update_correlation(change):
with output:
clear_output(wait=True)
# Get the selected columns
col1 = col1_dropdown.value
col2 = col2_dropdown.value
# Calculate correlation
correlation_value = df[col1].corr(df[col2])
print(f"Correlation between {col1} and {col2}: {correlation_value:.2f}")
# Create a scatter plot
correlation_plot = (
alt.Chart(df)
.mark_circle(size=60)
.encode(
x=alt.X(f"{col1}:Q", title=col1),
y=alt.Y(f"{col2}:Q", title=col2),
color="Region:N",
tooltip=["Project Name", "Supplier", col1, col2],
)
.properties(
title=f"Correlation Plot: {col1} vs. {col2}",
width=600,
height=400
)
)
display(correlation_plot)
col1_dropdown.observe(update_correlation, names='value')
col2_dropdown.observe(update_correlation, names='value')
print("Select columns for correlation analysis:")
display(col1_dropdown, col2_dropdown, output)