|
|
|
|
|
import polars as pl |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
import numpy as np |
|
|
|
|
|
def create_visualizations(): |
|
|
"""Create visualizations from the analyzed data""" |
|
|
|
|
|
try: |
|
|
|
|
|
df = pl.read_csv('tiktok_cleaned.csv') |
|
|
|
|
|
|
|
|
plt.style.use('default') |
|
|
sns.set_palette("husl") |
|
|
|
|
|
|
|
|
fig, axes = plt.subplots(2, 2, figsize=(15, 12)) |
|
|
fig.suptitle('TikTok Dataset Analysis', fontsize=16, fontweight='bold') |
|
|
|
|
|
|
|
|
likes_data = df['digg_count'].to_list() |
|
|
axes[0, 0].hist(likes_data, bins=50, alpha=0.7, edgecolor='black') |
|
|
axes[0, 0].set_title('Distribution of Video Likes (Digg Count)') |
|
|
axes[0, 0].set_xlabel('Number of Likes') |
|
|
axes[0, 0].set_ylabel('Frequency') |
|
|
axes[0, 0].grid(True, alpha=0.3) |
|
|
|
|
|
|
|
|
views_data = df['play_count'].to_list() |
|
|
axes[0, 1].hist(views_data, bins=50, alpha=0.7, edgecolor='black') |
|
|
axes[0, 1].set_title('Distribution of Video Views (Play Count)') |
|
|
axes[0, 1].set_xlabel('Number of Views') |
|
|
axes[0, 1].set_ylabel('Frequency') |
|
|
axes[0, 1].grid(True, alpha=0.3) |
|
|
|
|
|
|
|
|
axes[1, 0].scatter(views_data, likes_data, alpha=0.6) |
|
|
axes[1, 0].set_title('Views vs Likes Correlation') |
|
|
axes[1, 0].set_xlabel('Views (Play Count)') |
|
|
axes[1, 0].set_ylabel('Likes (Digg Count)') |
|
|
axes[1, 0].grid(True, alpha=0.3) |
|
|
|
|
|
|
|
|
engagement_metrics = ['digg_count', 'comment_count', 'share_count'] |
|
|
avg_engagement = [df[metric].mean() for metric in engagement_metrics] |
|
|
|
|
|
bars = axes[1, 1].bar(['Likes', 'Comments', 'Shares'], avg_engagement) |
|
|
axes[1, 1].set_title('Average Engagement Metrics') |
|
|
axes[1, 1].set_ylabel('Average Count') |
|
|
|
|
|
|
|
|
for bar in bars: |
|
|
height = bar.get_height() |
|
|
axes[1, 1].text(bar.get_x() + bar.get_width()/2., height, |
|
|
f'{height:,.0f}', |
|
|
ha='center', va='bottom') |
|
|
|
|
|
plt.tight_layout() |
|
|
plt.savefig('tiktok_analysis_visualizations.png', dpi=300, bbox_inches='tight') |
|
|
plt.show() |
|
|
|
|
|
print("π Visualizations saved as 'tiktok_analysis_visualizations.png'") |
|
|
|
|
|
|
|
|
if 'duration' in df.columns: |
|
|
create_duration_visualizations(df) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error creating visualizations: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
|
|
|
def create_duration_visualizations(df): |
|
|
"""Create visualizations related to video duration""" |
|
|
fig, axes = plt.subplots(1, 2, figsize=(12, 5)) |
|
|
|
|
|
|
|
|
duration_data = df['duration'].to_list() |
|
|
axes[0].hist(duration_data, bins=30, alpha=0.7, edgecolor='black') |
|
|
axes[0].set_title('Distribution of Video Duration') |
|
|
axes[0].set_xlabel('Duration (seconds)') |
|
|
axes[0].set_ylabel('Frequency') |
|
|
axes[0].grid(True, alpha=0.3) |
|
|
|
|
|
|
|
|
axes[1].scatter(duration_data, df['digg_count'].to_list(), alpha=0.6) |
|
|
axes[1].set_title('Duration vs Likes') |
|
|
axes[1].set_xlabel('Duration (seconds)') |
|
|
axes[1].set_ylabel('Likes (Digg Count)') |
|
|
axes[1].grid(True, alpha=0.3) |
|
|
|
|
|
plt.tight_layout() |
|
|
plt.savefig('duration_analysis.png', dpi=300, bbox_inches='tight') |
|
|
plt.show() |
|
|
|
|
|
print("π Duration visualizations saved as 'duration_analysis.png'") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
create_visualizations() |