File size: 14,761 Bytes
ce5df7b
5666118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce5df7b
5666118
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
import io

# Metadata
AUTHOR = "Eduardo Nacimiento García"
EMAIL = "enacimie@ull.edu.es"
LICENSE = "Apache 2.0"

# Page config
st.set_page_config(
    page_title="SimpleViz",
    page_icon="🎨",
    layout="wide",
    initial_sidebar_state="expanded",
)

# Title
st.title("🎨 SimpleViz")
st.markdown(f"**Author:** {AUTHOR} | **Email:** {EMAIL} | **License:** {LICENSE}")
st.write("""
Upload a CSV or use the demo dataset to create beautiful, interactive visualizations in seconds.
""")

# === GENERATE DEMO DATASET ===
@st.cache_data
def create_demo_data():
    np.random.seed(42)
    n = 500
    data = {
        "Age": np.random.normal(35, 12, n).astype(int),
        "Income": np.random.normal(45000, 15000, n),
        "Satisfaction": np.random.randint(1, 11, n),
        "City": np.random.choice(["Madrid", "Barcelona", "Valencia", "Seville"], n),
        "Gender": np.random.choice(["M", "F"], n, p=[0.6, 0.4]),
        "Purchase": np.random.choice([0, 1], n, p=[0.7, 0.3]),
        "Date": pd.date_range(start="2023-01-01", periods=n, freq="D")
    }
    df = pd.DataFrame(data)
    # Introduce some nulls for realism
    df.loc[np.random.choice(df.index, 15), "Income"] = np.nan
    return df

# === LOAD DATA ===
if st.button("🧪 Load Demo Dataset"):
    st.session_state['df'] = create_demo_data()
    st.success("✅ Demo dataset loaded!")

uploaded_file = st.file_uploader("📂 Upload your CSV file", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.session_state['df'] = df
    st.success("✅ File uploaded successfully.")

if 'df' not in st.session_state:
    st.info("👆 Upload a CSV or click 'Load Demo Dataset' to begin.")
    st.stop()

df = st.session_state['df']

# Show data preview
with st.expander("🔍 Data Preview (first 10 rows)"):
    st.dataframe(df.head(10))

# Basic info
st.subheader("📌 Dataset Info")
col1, col2, col3 = st.columns(3)
col1.metric("Rows", df.shape[0])
col2.metric("Columns", df.shape[1])
col3.metric("Missing Values", df.isnull().sum().sum())

# === AUTO-VISUALIZATION RECOMMENDATIONS ===
st.header("✨ Smart Visualization Suggestions")

numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
datetime_cols = df.select_dtypes(include=['datetime', 'datetime64']).columns.tolist()

if datetime_cols:
    date_col = datetime_cols[0]
else:
    date_col = None

# Suggest visualizations based on data types
suggestions = []

if len(numeric_cols) >= 2:
    suggestions.append({
        "name": "Scatter Plot",
        "description": "Visualize relationship between two numeric variables",
        "plot_type": "scatter",
        "x": numeric_cols[0],
        "y": numeric_cols[1] if len(numeric_cols) > 1 else numeric_cols[0]
    })

if len(numeric_cols) >= 1:
    suggestions.append({
        "name": "Histogram",
        "description": "Distribution of a numeric variable",
        "plot_type": "histogram",
        "x": numeric_cols[0]
    })

if len(categorical_cols) >= 1 and len(numeric_cols) >= 1:
    suggestions.append({
        "name": "Bar Plot (Mean)",
        "description": "Compare mean of numeric variable across categories",
        "plot_type": "bar",
        "x": categorical_cols[0],
        "y": numeric_cols[0]
    })

if len(categorical_cols) >= 2:
    suggestions.append({
        "name": "Stacked Bar Plot",
        "description": "Relationship between two categorical variables",
        "plot_type": "stacked_bar",
        "x": categorical_cols[0],
        "color": categorical_cols[1] if len(categorical_cols) > 1 else categorical_cols[0]
    })

if date_col and len(numeric_cols) >= 1:
    suggestions.append({
        "name": "Time Series Line Plot",
        "description": "Trend of numeric variable over time",
        "plot_type": "line",
        "x": date_col,
        "y": numeric_cols[0]
    })

if len(numeric_cols) >= 3:
    suggestions.append({
        "name": "Scatter Plot with Color",
        "description": "Scatter plot with third variable as color",
        "plot_type": "scatter_color",
        "x": numeric_cols[0],
        "y": numeric_cols[1],
        "color": numeric_cols[2]
    })

if len(numeric_cols) >= 2:
    suggestions.append({
        "name": "Box Plot",
        "description": "Distribution and outliers of numeric variable by category",
        "plot_type": "box",
        "x": categorical_cols[0] if categorical_cols else None,
        "y": numeric_cols[0]
    })

if len(numeric_cols) >= 2:
    suggestions.append({
        "name": "Correlation Heatmap",
        "description": "Correlation matrix of numeric variables",
        "plot_type": "heatmap",
        "cols": numeric_cols[:5]  # Limit to 5 columns for readability
    })

# Display suggestions
for i, suggestion in enumerate(suggestions):
    with st.expander(f"🎨 Suggestion {i+1}: {suggestion['name']}"):
        st.write(suggestion["description"])
        if st.button(f"Create {suggestion['name']}", key=f"sug_{i}"):
            st.session_state['selected_suggestion'] = suggestion

# === CUSTOM VISUALIZATION BUILDER ===
st.header("🛠️ Custom Visualization Builder")

plot_types = [
    "Scatter Plot",
    "Line Plot",
    "Bar Plot",
    "Histogram",
    "Box Plot",
    "Violin Plot",
    "Pie Chart",
    "Heatmap (Correlation)"
]

selected_plot = st.selectbox("Choose plot type:", plot_types)

fig = None

if selected_plot == "Scatter Plot":
    col1, col2 = st.columns(2)
    with col1:
        x_col = st.selectbox("X-axis:", numeric_cols)
    with col2:
        y_col = st.selectbox("Y-axis:", [col for col in numeric_cols if col != x_col] if len(numeric_cols) > 1 else numeric_cols)
    
    color_col = st.selectbox("Color by (optional):", [None] + categorical_cols + numeric_cols, key="scatter_color")
    size_col = st.selectbox("Size by (optional):", [None] + numeric_cols, key="scatter_size")
    
    title = st.text_input("Plot title:", f"{y_col} vs {x_col}")
    
    if st.button("Generate Scatter Plot"):
        fig = px.scatter(df, x=x_col, y=y_col, color=color_col, size=size_col, title=title)

elif selected_plot == "Line Plot":
    if not datetime_cols and not categorical_cols:
        st.warning("No suitable columns for line plot. Need datetime or categorical x-axis.")
    else:
        available_x = datetime_cols + categorical_cols if datetime_cols else categorical_cols
        col1, col2 = st.columns(2)
        with col1:
            x_col = st.selectbox("X-axis:", available_x)
        with col2:
            y_col = st.selectbox("Y-axis:", numeric_cols)
        
        color_col = st.selectbox("Color by (optional):", [None] + categorical_cols, key="line_color")
        title = st.text_input("Plot title:", f"{y_col} over {x_col}")
        
        if st.button("Generate Line Plot"):
            fig = px.line(df, x=x_col, y=y_col, color=color_col, title=title, markers=True)

elif selected_plot == "Bar Plot":
    if not categorical_cols:
        st.warning("No categorical columns available for bar plot.")
    else:
        col1, col2 = st.columns(2)
        with col1:
            x_col = st.selectbox("Category column:", categorical_cols)
        with col2:
            y_col = st.selectbox("Value column:", numeric_cols)
        
        agg_func = st.selectbox("Aggregation:", ["Mean", "Sum", "Count", "Median"])
        color_col = st.selectbox("Color by (optional):", [None] + categorical_cols, key="bar_color")
        title = st.text_input("Plot title:", f"{agg_func} of {y_col} by {x_col}")
        
        if st.button("Generate Bar Plot"):
            if agg_func == "Mean":
                fig = px.bar(df, x=x_col, y=y_col, color=color_col, title=title)
            elif agg_func == "Sum":
                fig_data = df.groupby(x_col)[y_col].sum().reset_index()
                fig = px.bar(fig_data, x=x_col, y=y_col, color=color_col, title=title)
            elif agg_func == "Count":
                fig = px.histogram(df, x=x_col, color=color_col, title=title)
            else:  # Median
                fig_data = df.groupby(x_col)[y_col].median().reset_index()
                fig = px.bar(fig_data, x=x_col, y=y_col, color=color_col, title=title)

elif selected_plot == "Histogram":
    if not numeric_cols:
        st.warning("No numeric columns available for histogram.")
    else:
        col1, col2 = st.columns(2)
        with col1:
            x_col = st.selectbox("Variable:", numeric_cols)
        with col2:
            nbins = st.slider("Number of bins:", min_value=5, max_value=100, value=30)
        
        color_col = st.selectbox("Color by (optional):", [None] + categorical_cols, key="hist_color")
        title = st.text_input("Plot title:", f"Distribution of {x_col}")
        
        if st.button("Generate Histogram"):
            fig = px.histogram(df, x=x_col, nbins=nbins, color=color_col, title=title, marginal="box")

elif selected_plot == "Box Plot":
    if not numeric_cols:
        st.warning("No numeric columns available for box plot.")
    else:
        col1, col2 = st.columns(2)
        with col1:
            y_col = st.selectbox("Numeric variable:", numeric_cols)
        with col2:
            x_col = st.selectbox("Group by (optional):", [None] + categorical_cols)
        
        title = st.text_input("Plot title:", f"Box plot of {y_col}" + (f" by {x_col}" if x_col else ""))
        
        if st.button("Generate Box Plot"):
            fig = px.box(df, x=x_col, y=y_col, title=title)

elif selected_plot == "Violin Plot":
    if not numeric_cols:
        st.warning("No numeric columns available for violin plot.")
    else:
        col1, col2 = st.columns(2)
        with col1:
            y_col = st.selectbox("Numeric variable:", numeric_cols)
        with col2:
            x_col = st.selectbox("Group by (optional):", [None] + categorical_cols)
        
        title = st.text_input("Plot title:", f"Violin plot of {y_col}" + (f" by {x_col}" if x_col else ""))
        
        if st.button("Generate Violin Plot"):
            fig = px.violin(df, x=x_col, y=y_col, box=True, points="outliers", title=title)

elif selected_plot == "Pie Chart":
    if not categorical_cols:
        st.warning("No categorical columns available for pie chart.")
    else:
        col_to_plot = st.selectbox("Category column:", categorical_cols)
        title = st.text_input("Plot title:", f"Distribution of {col_to_plot}")
        
        if st.button("Generate Pie Chart"):
            fig = px.pie(df, names=col_to_plot, title=title)

elif selected_plot == "Heatmap (Correlation)":
    if len(numeric_cols) < 2:
        st.warning("Need at least 2 numeric columns for correlation heatmap.")
    else:
        selected_cols = st.multiselect("Select columns for correlation:", numeric_cols, default=numeric_cols[:5] if len(numeric_cols) >= 5 else numeric_cols)
        
        if len(selected_cols) < 2:
            st.warning("Please select at least 2 columns.")
        else:
            title = st.text_input("Plot title:", "Correlation Heatmap")
            
            if st.button("Generate Heatmap"):
                corr_matrix = df[selected_cols].corr()
                fig = px.imshow(corr_matrix, 
                               text_auto=".2f", 
                               aspect="auto", 
                               title=title, 
                               color_continuous_scale='RdBu_r',
                               labels=dict(color="Correlation"))

# Display and download plot
if fig:
    st.plotly_chart(fig, use_container_width=True)
    
    # Download options
    st.subheader("💾 Download Plot")
    col1, col2 = st.columns(2)
    
    with col1:
        png_data = fig.to_image(format="png", width=1200, height=800, scale=2)
        st.download_button(
            label="Download as PNG",
            data=png_data,
            file_name="plot.png",
            mime="image/png"
        )
    
    with col2:
        html_data = fig.to_html(include_plotlyjs="cdn")
        st.download_button(
            label="Download as HTML",
            data=html_data,
            file_name="plot.html",
            mime="text/html"
        )

# === MULTI-PLOT COMPARISON ===
st.header("⚖️ Compare Multiple Plots")

num_plots = st.slider("Number of plots to compare:", min_value=1, max_value=4, value=2)

if num_plots > 1:
    fig_compare = make_subplots(
        rows=1, cols=num_plots,
        subplot_titles=[f"Plot {i+1}" for i in range(num_plots)],
        shared_yaxes=False
    )
    
    plot_success = True
    
    for i in range(num_plots):
        st.markdown(f"### Plot {i+1}")
        plot_type = st.selectbox(f"Plot type:", plot_types, key=f"compare_type_{i}")
        
        try:
            if plot_type == "Scatter Plot" and len(numeric_cols) >= 2:
                x_col = st.selectbox(f"X-axis:", numeric_cols, key=f"compare_x_{i}")
                y_col = st.selectbox(f"Y-axis:", [col for col in numeric_cols if col != x_col], key=f"compare_y_{i}")
                trace = go.Scatter(x=df[x_col], y=df[y_col], mode='markers', name=f"{y_col} vs {x_col}")
                fig_compare.add_trace(trace, row=1, col=i+1)
                
            elif plot_type == "Histogram" and len(numeric_cols) >= 1:
                x_col = st.selectbox(f"Variable:", numeric_cols, key=f"compare_hist_{i}")
                trace = go.Histogram(x=df[x_col], name=f"Distribution of {x_col}")
                fig_compare.add_trace(trace, row=1, col=i+1)
                
            elif plot_type == "Bar Plot" and len(categorical_cols) >= 1 and len(numeric_cols) >= 1:
                x_col = st.selectbox(f"Category:", categorical_cols, key=f"compare_bar_x_{i}")
                y_col = st.selectbox(f"Value:", numeric_cols, key=f"compare_bar_y_{i}")
                trace = go.Bar(x=df[x_col], y=df[y_col], name=f"{y_col} by {x_col}")
                fig_compare.add_trace(trace, row=1, col=i+1)
                
            else:
                st.warning(f"Plot {i+1}: Invalid combination for {plot_type}")
                plot_success = False
                
        except Exception as e:
            st.error(f"Error in Plot {i+1}: {e}")
            plot_success = False
    
    if plot_success and st.button("Generate Comparison Plot"):
        fig_compare.update_layout(height=600, showlegend=True, title_text="Comparison of Multiple Plots")
        st.plotly_chart(fig_compare, use_container_width=True)

# Footer
st.markdown("---")
st.caption(f"© {AUTHOR} | License {LICENSE} | Contact: {EMAIL}")