Create correlation_plot

#1
Files changed (1) hide show
  1. correlation_plot +36 -0
correlation_plot ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import altair as alt
4
+
5
+ url = "contract_awards_in_investment_project_financing_22-11-2024.csv"
6
+ df = pd.read_csv(url)
7
+
8
+ # Filter numeric columns for correlation analysis
9
+ numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
10
+ st.header("Correlation Analysis")
11
+
12
+ # Dropdowns for user selection of columns to correlate
13
+ col1 = st.selectbox("Select First Numeric Attribute", numeric_columns, index=0)
14
+ col2 = st.selectbox("Select Second Numeric Attribute", numeric_columns, index=1)
15
+
16
+ # Correlation calculation
17
+ correlation_value = df[col1].corr(df[col2])
18
+ st.write(f"**Correlation between {col1} and {col2}: {correlation_value:.2f}**")
19
+
20
+ # Create a scatter plot
21
+ correlation_plot = (
22
+ alt.Chart(df)
23
+ .mark_circle(size=60)
24
+ .encode(
25
+ x=alt.X(f"{col1}:Q", title=col1),
26
+ y=alt.Y(f"{col2}:Q", title=col2),
27
+ color="Region:N", # Color by region for additional context
28
+ tooltip=["Project Name", "Supplier Name", col1, col2],
29
+ )
30
+ .properties(
31
+ title=f"Correlation Plot: {col1} vs. {col2}",
32
+ width=600,
33
+ height=400
34
+ )
35
+ )
36
+ st.altair_chart(correlation_plot, use_container_width=True)