File size: 1,211 Bytes
bde793d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
def explore_data(file_path):
df = pd.read_csv(file_path)
print("Dataset Shape:", df.shape)
print("\nColumns:", df.columns.tolist())
# Check for missing values
print("\nMissing Values:\n", df.isnull().sum())
# Engine distribution
plt.figure(figsize=(10, 6))
sns.countplot(x='engine', data=df)
plt.title('Distribution of Chatbot Engines')
plt.savefig('engine_distribution.png')
# Performance distribution (Best/Worst)
plt.figure(figsize=(10, 6))
df['best'].value_counts().plot(kind='bar')
plt.title('Distribution of "Best" Label')
plt.savefig('best_distribution.png')
# p1-p10 correlation
p_cols = [f'p{i}' for i in range(1, 11)]
plt.figure(figsize=(12, 10))
sns.heatmap(df[p_cols].astype(int).corr(), annot=True, cmap='coolwarm')
plt.title('Correlation between Evaluation Parameters (p1-p10)')
plt.savefig('p_correlation.png')
print("\nTarget Variable 'best' counts:")
print(df['best'].value_counts())
if __name__ == "__main__":
explore_data('BP_MHS_V1.csv')
|