Spaces:
Runtime error
Runtime error
| import base64 | |
| import io | |
| from collections import Counter | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| def flatten_list_column(data, column): | |
| """Flatten a column containing lists into individual values with counts.""" | |
| # Flatten the lists into individual items | |
| flattened = [ | |
| item | |
| for sublist in data[column] | |
| if isinstance(sublist, list) | |
| for item in sublist | |
| ] | |
| # Count occurrences | |
| value_counts = pd.Series(Counter(flattened)) | |
| return value_counts | |
| def create_distribution_plot(data, column): | |
| """Create a beautiful distribution plot using Plotly and convert to image.""" | |
| try: | |
| # Check if the column contains lists | |
| if isinstance(data[column].iloc[0], list): | |
| print(f"Processing list column: {column}") | |
| value_counts = flatten_list_column(data, column) | |
| else: | |
| # Handle regular columns | |
| if data[column].dtype in ["int64", "float64"]: | |
| # Continuous data - use histogram | |
| fig = go.Figure() | |
| # Add histogram | |
| fig.add_trace( | |
| go.Histogram( | |
| x=data[column], | |
| name="Count", | |
| nbinsx=30, | |
| marker=dict( | |
| color="rgba(110, 68, 255, 0.7)", | |
| line=dict(color="rgba(184, 146, 255, 1)", width=1), | |
| ), | |
| ) | |
| ) | |
| else: | |
| # Categorical data | |
| value_counts = data[column].value_counts() | |
| # For both list columns and categorical data | |
| if "value_counts" in locals(): | |
| fig = go.Figure( | |
| [ | |
| go.Bar( | |
| x=value_counts.index, | |
| y=value_counts.values, | |
| marker=dict( | |
| color=value_counts.values, | |
| colorscale=px.colors.sequential.Plotly3, | |
| ), | |
| ) | |
| ] | |
| ) | |
| # Common layout updates | |
| fig.update_layout( | |
| title=f"Distribution of {column}", | |
| xaxis_title=column, | |
| yaxis_title="Count", | |
| template="plotly_white", | |
| margin=dict(t=50, l=50, r=50, b=50), | |
| width=1200, | |
| height=800, | |
| showlegend=False, | |
| ) | |
| # Rotate x-axis labels if needed | |
| if isinstance(data[column].iloc[0], list) or data[column].dtype not in [ | |
| "int64", | |
| "float64", | |
| ]: | |
| fig.update_layout(xaxis_tickangle=-45) | |
| # Convert to PNG | |
| img_bytes = fig.to_image(format="png", scale=2.0) | |
| # Encode to base64 | |
| img_base64 = base64.b64encode(img_bytes).decode() | |
| return img_base64 | |
| except Exception as e: | |
| print(f"Error creating distribution plot for {column}: {str(e)}") | |
| raise e | |
| def create_wordcloud(data, column): | |
| """Create a word cloud visualization.""" | |
| import matplotlib.pyplot as plt | |
| from wordcloud import WordCloud | |
| try: | |
| # Handle list columns | |
| if isinstance(data[column].iloc[0], list): | |
| text = " ".join( | |
| [ | |
| " ".join(map(str, sublist)) | |
| for sublist in data[column] | |
| if isinstance(sublist, list) | |
| ] | |
| ) | |
| else: | |
| # Handle regular columns | |
| text = " ".join(data[column].astype(str)) | |
| wordcloud = WordCloud( | |
| width=1200, | |
| height=800, | |
| background_color="white", | |
| colormap="plasma", | |
| max_words=100, | |
| ).generate(text) | |
| # Create matplotlib figure | |
| plt.figure(figsize=(10, 5)) | |
| plt.imshow(wordcloud, interpolation="bilinear") | |
| plt.axis("off") | |
| plt.title(f"Word Cloud for {column}") | |
| # Save to bytes | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format="png", bbox_inches="tight", dpi=300) | |
| plt.close() | |
| buf.seek(0) | |
| # Convert to base64 | |
| img_base64 = base64.b64encode(buf.getvalue()).decode() | |
| return img_base64 | |
| except Exception as e: | |
| print(f"Error creating word cloud for {column}: {str(e)}") | |
| raise e | |
| def create_wordcloud(data, column): | |
| """Create a word cloud visualization.""" | |
| import matplotlib.pyplot as plt | |
| from wordcloud import WordCloud | |
| # Generate word cloud | |
| text = " ".join(data[column].astype(str)) | |
| wordcloud = WordCloud( | |
| width=800, | |
| height=400, | |
| background_color="white", | |
| colormap="plasma", | |
| max_words=100, | |
| ).generate(text) | |
| # Create matplotlib figure | |
| plt.figure(figsize=(10, 5)) | |
| plt.imshow(wordcloud, interpolation="bilinear") | |
| plt.axis("off") | |
| plt.title(f"Word Cloud for {column}") | |
| # Save to bytes | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format="png", bbox_inches="tight", dpi=300) | |
| plt.close() | |
| buf.seek(0) | |
| # Convert to base64 | |
| img_base64 = base64.b64encode(buf.getvalue()).decode() | |
| return img_base64 | |