Visualization Flashcards

Question 1

Q

write to activate plotting

Answer

A

%pylab inline

%matplotlib inline

Question 2

Q

sign the axis on plot

Answer

A

xlabel(‘Iteration number’)

ylabel(‘MSE’)

Question 3

Q

draw histogram

Answer

A

data.plot(y=’Height’, kind=’hist’,

color=’red’, title=’Height (inch.) distribution’)

Question 4

Q

draw pairplot from seaborn library

Answer

A

%import seaborn as sns

sns.pairplot(data)

Question 5

Q

draw boxplot

Answer

A

sns.boxplot(x=’weight_category’, y=’Height’, data=data)

Question 6

Q

draw scatterplot

Answer

A

data.plot(‘Weight’, ‘Height’, kind=’scatter’,title=’Height(inches)/Weight(pounds)’)

Question 7

Q

create axis split

Answer

A

x = np.linspace(60, 180, 100)

Question 8

Q

draw 2 lines with legend at the upper left corner of the plot

Answer

A

line1, = plt.plot(x, y_1, color='magenta',label = 'height = 60 + 0.05 * weight ')
line2, = plt.plot(x, y_2, color='green',label = 'height = 50 + 0.16 * weight ')

plt. legend(handles=[line1,line2], loc=2)
plt. show()

Question 9

Q

draw plot by applying function to range of X (np array)

Answer

A

w1_range = np.linspace(-0.5, 1, 100)
err = [squared_error(w, data) for w[1] in w1_range]

plt.plot(w1_range, err)

Question 10

Q

get current axis

Answer

A

fig = plt.figure()
ax = fig.gca(projection='3d')

Question 11

Q

create grid

Answer

A

X = np.arange(-5, 5, 0.25)
Y = np.arange(-5, 5, 0.25)
X, Y = np.meshgrid(X, Y)

Question 12

Q

plot surface for X, Y, Z ;sign the axis

Answer

A

surf = ax.plot_surface(X, Y, Z)

ax. set_xlabel(‘X’)
ax. set_ylabel(‘Y’)
ax. set_zlabel(‘Z’)
plt. show()

Question 13

Q

visualize dependencies between features and target variable in pandas dataset df

Answer

A

fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(15, 10))
for idx, feature in enumerate(df.columns[:-1]):
df.plot(feature, “cnt”, subplots=True, kind=”scatter”, ax=axes[idx / 4, idx % 4])

Question 14

Q

correlation of features with target variable ‘cnt’ in df dataset

Answer

A

X_f = df.ix[:,range(df.shape[1]-1)]

X_f.corrwith(df[‘cnt’])

Question 15

Q

shuffle the dataset before constructing regression model, normalize features

Answer

A

df_shuffled = shuffle(df, random_state=123)
X = scale(df_shuffled[df_shuffled.columns[:-1]])
y = df_shuffled["cnt"]

Question 16

Q

plot weights of model’s parameters depending on learning parameter alpha

Answer

A

plt.figure(figsize=(8, 5))
for coef, feature in zip(coefs_lasso.T, df.columns):
plt.plot(alphas, coef, label=feature, color=np.random.rand(3))
plt.legend(loc=”upper right”, bbox_to_anchor=(1.4, 0.95))
plt.xlabel(“alpha”)
plt.ylabel(“feature weight”)
plt.title(“Lasso”)

Question 17

Q

plot scatterplot of
  classification_problem
[2 features, 3classes] using 
1)map and lambda functions 
2direct request
3)list comprehension

Answer

A

pylab.figure(figsize=(8,6))
1)
pylab.scatter(map(lambda x: x[0], classification_problem[0]), map(lambda x: x[1], classification_problem[0]),
c=classification_problem[1], cmap=colors, s=100)

2)
pylab.scatter(classification_problem[0][:,0], classification_problem[0][:,1],
c=classification_problem[1], cmap=colors, s=100)

3)
pylab.scatter([x[0] for x in classification_problem[0]],[x[1] for x in classification_problem[0]],
c=classification_problem[1], cmap=colors, s=100)

Question 18

Q

Train tree classifier , plot decision surface and data points

Answer

A

from sklearn import cross_validation, datasets, metrics, tree

#1)Split the data into train and test sets
train_data, test_data, train_labels, test_labels = cross_validation.train_test_split(classification_problem[0], 
                                                                                     classification_problem[1], 
                                                                                     test_size = 0.3,
                                                                                     random_state = 1)

#2)Ancillary function for defining grid
def get_meshgrid(data, step=.05, border=.5,):
    x_min, x_max = data[:, 0].min() - border, data[:, 0].max() + border
    y_min, y_max = data[:, 1].min() - border, data[:, 1].max() + border
    return np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step))

#3)Function to plot surface and points 
def plot_decision_surface(estimator, train_data, train_labels, test_data, test_labels, 
                          colors = colors, light_colors = light_colors):
    #fit model
    estimator.fit(train_data, train_labels)

    #set figure size
    pyplot.figure(figsize = (16, 6))

    #plot decision surface on the train data 
    pyplot.subplot(1,2,1)
    xx, yy = get_meshgrid(train_data)
    mesh_predictions = np.array(estimator.predict(np.c_[xx.ravel(), yy.ravel()])).reshape(xx.shape)
    pyplot.pcolormesh(xx, yy, mesh_predictions, cmap = light_colors)
    pyplot.scatter(train_data[:, 0], train_data[:, 1], c = train_labels, s = 100, cmap = colors)
    pyplot.title('Train data, accuracy={:.2f}'.format(metrics.accuracy_score(train_labels, estimator.predict(train_data))))

#plot decision surface on the test data

pyplot. subplot(1,2,2)
pyplot. pcolormesh(xx, yy, mesh_predictions, cmap = light_colors)
pyplot. scatter(test_data[:, 0], test_data[:, 1], c = test_labels, s = 100, cmap = colors)
pyplot. title('Test data, accuracy={:.2f}'.format(metrics.accuracy_score(test_labels, estimator.predict(test_data))))

plot_decision_surface(tree.DecisionTreeClassifier(random_state = 1, min_samples_leaf = 3),
train_data, train_labels, test_data, test_labels)

Question 19

Q

create new figure

Answer

A

plt. figure()

plt. show()

Question 20

Q

plot histograms of features

Answer

A

data[real_features].hist(bins=100,figsize=(20, 20))

Question 21

Q

plot heatmap of correlations

Answer

A

seaborn.heatmap(data[real_features].corr(), square=True)

Question 22

Q

plot countplots for features ‘MK1_’, ‘MK_2’, ‘MK_3’ in dataset data with splitting by target variable ‘Response’

Answer

A

fig,axes = plt.subplots(2, 2, figsize=(35, 35), sharey=True)
medical_key = [‘MK1_’, ‘MK_2’, ‘MK_3’]
for i in range(len(medical_key)):
seaborn.countplot(x=medical_key[i], data=data, hue=”Response”, ax=axes[i / 2, i % 2])

Question 23

Q

1)plot TSNE representation of features with different colours corresponding to different classes(values of target variable data.Response)

Answer

A

from sklearn.manifold import TSNE
import matplotlib.cm as cm

model = TSNE(random_state = 321)
tsne_representation = model.fit_transform(data_subset)

colors = cm.rainbow(np.linspace(0, 1, len(set(response_subset))))
for y, c in zip(set(data.Response), colors):
plt.scatter(tsne_representation[response_subset.values==y, 0],
tsne_representation[response_subset.values==y, 1], c=c, alpha=0.5, label=str(y))
plt.legend()

Question 24

Q

plot pdf histograms for features ‘F_1’, ‘F_2’, ‘F_3’ of dataset data with dropped from them NA values

Answer

A

fig, axes = plt.subplots(1, 3, figsize=(14,6))
k = 0
for i in [‘F_1’, ‘F_2’, ‘F_3’]:
seaborn.distplot(data[i].dropna(), bins=50, ax=axes[k])
k+=1

Brainscape's Knowledge GenomeTM

Visualization Flashcards

Brainscape's Knowledge Genome^TM