mulasagg commited on
Commit
d1ae8d3
·
1 Parent(s): 3313dcb
Files changed (2) hide show
  1. app.py +156 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ from io import BytesIO
5
+ import numpy as np
6
+
7
+ # Set the style for all plots - using a built-in style
8
+ plt.style.use('fivethirtyeight')
9
+
10
+ def configure_plot_style(fig, ax):
11
+ """Configure common plot styling elements"""
12
+ ax.spines['top'].set_visible(False)
13
+ ax.spines['right'].set_visible(False)
14
+ ax.grid(True, linestyle='--', alpha=0.7)
15
+ fig.patch.set_facecolor('white')
16
+ ax.set_facecolor('white')
17
+
18
+
19
+ st.title("Interactive Dataset Plotting Tool")
20
+
21
+ # Upload Dataset
22
+ uploaded_file = st.file_uploader("Upload your CSV dataset", type=["csv"])
23
+
24
+ if uploaded_file:
25
+ try:
26
+ # Load dataset
27
+ df = pd.read_csv(uploaded_file)
28
+ st.write("Dataset Preview:")
29
+ st.dataframe(df)
30
+
31
+ # Plot type selection
32
+ plot_types = ["Line Plot", "Bar Plot", "Scatter Plot", "Histogram", "Box Plot", "Correlation Matrix"]
33
+ plot_type = st.selectbox("Select Plot Type:", plot_types)
34
+
35
+ # Color scheme selection
36
+ color_schemes = ['viridis', 'magma', 'plasma', 'inferno', 'cividis']
37
+ color_scheme = st.selectbox("Select Color Scheme:", color_schemes)
38
+
39
+ # Common figure creation
40
+ fig, ax = plt.subplots(figsize=(10, 6))
41
+ configure_plot_style(fig, ax)
42
+
43
+ if plot_type in ["Line Plot", "Bar Plot"]:
44
+ x_column = st.selectbox("Select X-axis column:", df.columns)
45
+ y_column = st.selectbox("Select Y-axis column:", df.columns)
46
+
47
+ if not pd.api.types.is_numeric_dtype(df[y_column]):
48
+ st.warning("Y-axis column must be numeric for this plot type.")
49
+ else:
50
+ if plot_type == "Line Plot":
51
+ ax.plot(df[x_column], df[y_column], marker='o', linewidth=2,
52
+ color=plt.cm.get_cmap(color_scheme)(0.6))
53
+ else:
54
+ ax.bar(df[x_column], df[y_column], color=plt.cm.get_cmap(color_scheme)(0.6))
55
+
56
+
57
+ ax.set_title(f"{plot_type} of {y_column} vs {x_column}", pad=20, fontsize=14)
58
+ ax.set_xlabel(x_column, fontsize=12)
59
+ ax.set_ylabel(y_column, fontsize=12)
60
+ plt.xticks(rotation=45 if len(df[x_column].unique()) > 10 else 0)
61
+
62
+ elif plot_type == "Scatter Plot":
63
+ x_column = st.selectbox("Select X-axis column:", df.columns)
64
+ y_column = st.selectbox("Select Y-axis column:", df.columns)
65
+
66
+ if not pd.api.types.is_numeric_dtype(df[x_column]) or not pd.api.types.is_numeric_dtype(df[y_column]):
67
+ st.warning("Both X and Y columns must be numeric for scatter plot.")
68
+ else:
69
+ scatter = ax.scatter(df[x_column], df[y_column],
70
+ c=np.arange(len(df)), cmap=color_scheme,
71
+ alpha=0.6, s=100)
72
+ plt.colorbar(scatter, ax=ax, label='Index')
73
+ ax.set_title(f"Scatter Plot of {y_column} vs {x_column}", pad=20, fontsize=14)
74
+ ax.set_xlabel(x_column, fontsize=12)
75
+ ax.set_ylabel(y_column, fontsize=12)
76
+
77
+ elif plot_type == "Histogram":
78
+ column = st.selectbox("Select column:", df.columns)
79
+ bins = st.slider("Number of bins:", min_value=5, max_value=50, value=20)
80
+
81
+ if not pd.api.types.is_numeric_dtype(df[column]):
82
+ st.warning("Column must be numeric for histogram.")
83
+ else:
84
+ n, bins, patches = ax.hist(df[column], bins=bins, edgecolor='white', linewidth=1)
85
+ for i, patch in enumerate(patches):
86
+ patch.set_facecolor(plt.cm.get_cmap(color_scheme)(i/len(patches)))
87
+
88
+ ax.set_title(f"Histogram of {column}", pad=20, fontsize=14)
89
+ ax.set_xlabel(column, fontsize=12)
90
+ ax.set_ylabel("Frequency", fontsize=12)
91
+
92
+ elif plot_type == "Box Plot":
93
+ x_column = st.selectbox("Select grouping column:", df.columns)
94
+ y_column = st.selectbox("Select value column:", df.columns)
95
+
96
+ if not pd.api.types.is_numeric_dtype(df[y_column]):
97
+ st.warning("Value column must be numeric for box plot.")
98
+ else:
99
+ box_plot = ax.boxplot([group[1][y_column].values for group in df.groupby(x_column)],
100
+ labels=df[x_column].unique(),
101
+ patch_artist=True)
102
+
103
+ # Color the boxes
104
+ colors = [plt.cm.get_cmap(color_scheme)(i/len(box_plot['boxes']))
105
+ for i in range(len(box_plot['boxes']))]
106
+ for patch, color in zip(box_plot['boxes'], colors):
107
+ patch.set_facecolor(color)
108
+ patch.set_alpha(0.7)
109
+
110
+ ax.set_title(f"Box Plot of {y_column} grouped by {x_column}", pad=20, fontsize=14)
111
+ ax.set_xlabel(x_column, fontsize=12)
112
+ ax.set_ylabel(y_column, fontsize=12)
113
+ plt.xticks(rotation=45 if len(df[x_column].unique()) > 10 else 0)
114
+
115
+ elif plot_type == "Correlation Matrix":
116
+ numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
117
+ numeric_df = df[numeric_columns]
118
+
119
+ if len(numeric_columns) == 0:
120
+ st.warning("No numeric columns found in the dataset for correlation matrix.")
121
+ else:
122
+ corr = numeric_df.corr()
123
+ im = ax.imshow(corr, cmap=color_scheme)
124
+ plt.colorbar(im, ax=ax)
125
+
126
+ # Add correlation values
127
+ for i in range(len(corr)):
128
+ for j in range(len(corr)):
129
+ text = ax.text(j, i, f'{corr.iloc[i, j]:.2f}',
130
+ ha='center', va='center',
131
+ color='white' if abs(corr.iloc[i, j]) > 0.5 else 'black')
132
+
133
+ ax.set_xticks(range(len(corr.columns)))
134
+ ax.set_yticks(range(len(corr.columns)))
135
+ ax.set_xticklabels(corr.columns, rotation=45, ha='right')
136
+ ax.set_yticklabels(corr.columns)
137
+ ax.set_title("Correlation Matrix", pad=20, fontsize=14)
138
+
139
+ # Adjust layout and display plot
140
+ plt.tight_layout()
141
+ st.pyplot(fig)
142
+
143
+ # Download button
144
+ buffer = BytesIO()
145
+ plt.savefig(buffer, format="png", dpi=300, bbox_inches='tight')
146
+ buffer.seek(0)
147
+ st.download_button(
148
+ label="Download Plot as PNG",
149
+ data=buffer,
150
+ file_name="plot.png",
151
+ mime="image/png"
152
+ )
153
+
154
+ except Exception as e:
155
+ st.error(f"An error occurred: {str(e)}")
156
+ st.info("Please make sure your dataset is properly formatted and contains appropriate data types for the selected plot type.")
requirements.txt ADDED
Binary file (2.56 kB). View file