plt.figure(figsize=(16,6)) features = train_df.columns.values[2:202] plt.title("Distribution of mean values per row in the train and test set") sns.distplot(train_df[features].mean(axis=1),color="green", kde=True,bins=120, label='train') sns.distplot(test_df[features].mean(axis=1),color="blue", kde=True,bins=120, label='test') plt.legend() plt.show()
1 2 3 4 5 6
plt.figure(figsize=(16,6)) plt.title("Distribution of mean values per column in the train and test set") sns.distplot(train_df[features].mean(axis=0),color="magenta",kde=True,bins=120, label='train') sns.distplot(test_df[features].mean(axis=0),color="darkblue", kde=True,bins=120, label='test') plt.legend() plt.show()
1 2 3 4 5
plt.figure(figsize=(16,6)) plt.title("Distribution of std values per row in the train and test set") sns.distplot(train_df[features].std(axis=1),color="black", kde=True,bins=120, label='train') sns.distplot(test_df[features].std(axis=1),color="red", kde=True,bins=120, label='test') plt.legend();plt.show()
1 2 3 4 5
plt.figure(figsize=(16,6)) plt.title("Distribution of std values per column in the train and test set") sns.distplot(train_df[features].std(axis=0),color="blue",kde=True,bins=120, label='train') sns.distplot(test_df[features].std(axis=0),color="green", kde=True,bins=120, label='test') plt.legend(); plt.show()
除了拿Training Data 與 Test Data來比較外,我們也可以比較Training Data內不同Target的均值(標準差)有什麼不同。
1 2 3 4 5 6 7
t0 = train_df.loc[train_df['target'] == 0] t1 = train_df.loc[train_df['target'] == 1] plt.figure(figsize=(16,6)) plt.title("Distribution of mean values per row in the train set") sns.distplot(t0[features].mean(axis=1),color="red", kde=True,bins=120, label='target = 0') sns.distplot(t1[features].mean(axis=1),color="blue", kde=True,bins=120, label='target = 1') plt.legend(); plt.show()
1 2 3 4 5
plt.figure(figsize=(16,6)) plt.title("Distribution of mean values per column in the train set") sns.distplot(t0[features].mean(axis=0),color="green", kde=True,bins=120, label='target = 0') sns.distplot(t1[features].mean(axis=0),color="darkblue", kde=True,bins=120, label='target = 1') plt.legend(); plt.show()