Visualization Flashcards
write to activate plotting
%pylab inline
%matplotlib inline
sign the axis on plot
xlabel(‘Iteration number’)
ylabel(‘MSE’)
draw histogram
data.plot(y=’Height’, kind=’hist’,
color=’red’, title=’Height (inch.) distribution’)
draw pairplot from seaborn library
%import seaborn as sns
sns.pairplot(data)
draw boxplot
sns.boxplot(x=’weight_category’, y=’Height’, data=data)
draw scatterplot
data.plot(‘Weight’, ‘Height’, kind=’scatter’,title=’Height(inches)/Weight(pounds)’)
create axis split
x = np.linspace(60, 180, 100)
draw 2 lines with legend at the upper left corner of the plot
line1, = plt.plot(x, y_1, color='magenta',label = 'height = 60 + 0.05 * weight ') line2, = plt.plot(x, y_2, color='green',label = 'height = 50 + 0.16 * weight ')
plt. legend(handles=[line1,line2], loc=2)
plt. show()
draw plot by applying function to range of X (np array)
w1_range = np.linspace(-0.5, 1, 100) err = [squared_error(w, data) for w[1] in w1_range]
plt.plot(w1_range, err)
get current axis
fig = plt.figure() ax = fig.gca(projection='3d')
create grid
X = np.arange(-5, 5, 0.25)
Y = np.arange(-5, 5, 0.25)
X, Y = np.meshgrid(X, Y)
plot surface for X, Y, Z ;sign the axis
surf = ax.plot_surface(X, Y, Z)
ax. set_xlabel(‘X’)
ax. set_ylabel(‘Y’)
ax. set_zlabel(‘Z’)
plt. show()
visualize dependencies between features and target variable in pandas dataset df
fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(15, 10))
for idx, feature in enumerate(df.columns[:-1]):
df.plot(feature, “cnt”, subplots=True, kind=”scatter”, ax=axes[idx / 4, idx % 4])
correlation of features with target variable ‘cnt’ in df dataset
X_f = df.ix[:,range(df.shape[1]-1)]
X_f.corrwith(df[‘cnt’])
shuffle the dataset before constructing regression model, normalize features
df_shuffled = shuffle(df, random_state=123) X = scale(df_shuffled[df_shuffled.columns[:-1]]) y = df_shuffled["cnt"]