import pandas as pd import sqlite3 import matplotlib.pyplot as plt import seaborn as sns from wordcloud import WordCloud import os def generate_visuals(db_name, output_dir='outputs'): if not os.path.exists(output_dir): os.makedirs(output_dir) conn = sqlite3.connect(db_name) df = pd.read_sql('SELECT * FROM sharks', conn) conn.close() print("Generating visualizations...") # 1. Heatmap of Attacks by Month and Country # Focus on top 10 countries to keep it readable top_countries = df['Country'].value_counts().nlargest(10).index subset = df[df['Country'].isin(top_countries) & (df['Month'] > 0)] pivot_df = subset.groupby(['Country', 'Month']).size().unstack(fill_value=0) plt.figure(figsize=(12, 8)) sns.heatmap(pivot_df, annot=True, fmt="d", cmap="YlOrRd") plt.title('Heatmap of Shark Attacks by Month and Country (Top 10)') plt.savefig(f'{output_dir}/heatmap_attacks.png') plt.close() print("Saved heatmap.") # 2. WordCloud of Shark Species # Column is likely 'Species' or 'Species_' species_col = 'Species' if 'Species' in df.columns else ( 'Species_' if 'Species_' in df.columns else None ) if species_col: text = " ".join(df[species_col].dropna().astype(str)) wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) plt.figure(figsize=(15, 7)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') plt.title('WordCloud of Shark Species') plt.savefig(f'{output_dir}/wordcloud_species.png') plt.close() print("Saved WordCloud.") else: print("Species column not found for WordCloud.") if __name__ == "__main__": generate_visuals('master_sharks.db')