File size: 1,754 Bytes
1219693
ef3afc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
import altair as alt
import ipywidgets as widgets
from IPython.display import display, clear_output

alt.data_transformers.disable_max_rows()

url = "contract_awards_in_investment_project_financing_22-11-2024.csv"
df = pd.read_csv(url)

numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns

col1_dropdown = widgets.Dropdown(
    options=numeric_columns,
    value=numeric_columns[0],
    description="Column 1:"
)

col2_dropdown = widgets.Dropdown(
    options=numeric_columns,
    value=numeric_columns[1],
    description="Column 2:"
)

output = widgets.Output()

def update_correlation(change):
    with output:
        clear_output(wait=True)
        
        # Get the selected columns
        col1 = col1_dropdown.value
        col2 = col2_dropdown.value
        
        # Calculate correlation
        correlation_value = df[col1].corr(df[col2])
        print(f"Correlation between {col1} and {col2}: {correlation_value:.2f}")
        
        # Create a scatter plot
        correlation_plot = (
            alt.Chart(df)
            .mark_circle(size=60)
            .encode(
                x=alt.X(f"{col1}:Q", title=col1),
                y=alt.Y(f"{col2}:Q", title=col2),
                color="Region:N",  
                tooltip=["Project Name", "Supplier", col1, col2],  
            )
            .properties(
                title=f"Correlation Plot: {col1} vs. {col2}",
                width=600,
                height=400
            )
        )
        display(correlation_plot)

col1_dropdown.observe(update_correlation, names='value')
col2_dropdown.observe(update_correlation, names='value')

print("Select columns for correlation analysis:")
display(col1_dropdown, col2_dropdown, output)