201 - 250 Flashcards
pandas.DataFrame.boxplot(column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, backend=None, **kwargs)
Make a box plot from DataFrame columns.
data[['GrLivArea']].boxplot()
np.random.seed(1234) df = pd.DataFrame(np.random.randn(10, 4), columns=['Col1', 'Col2', 'Col3', 'Col4']) boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3'])
df = pd.DataFrame(np.random.randn(10, 2), columns=['Col1', 'Col2']) df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) boxplot = df.boxplot(by='X')
numpy.nan
NaNs can be used as a poor man’s mask (if you don’t care what the original value was)
myarr = np.array([1., 0., np.nan, 3.]) np.nonzero(myarr == np.nan)
numpy.logical_xor(x1, x2, /, out=None, *, where=True, casting=’same_kind’, order=’K’, dtype=None, subok=True[, signature, extobj])
Compute the truth value of x1 XOR x2, element-wise.
np.logical_xor(True, False) 👉 True
np.logical_xor([True, True, False, False], [True, False, True, False]) 👉 array([False, True, True, False])
arr1 = [8, 2, False, 4] arr2 = [3, 0, False, False] out_arr = np.logical_xor(arr1, arr2) 👉 [False True False True]
sklearn.base.Transformer
MixinMixin class for all transformers in scikit-learn. Самый простой способ создать собственный трансформатор - это импортировать FunctionTransformer из sklearn.preprocessing.
from sklearn.base import BaseEstimator, TransformerMixin class That(BaseEstimator, TransformerMixin): def \_\_init\_\_(self, this = True): self.this = this def fit(self, X, y=None): return self def transform(self, X, y=None): that = self.this return that
transformer = That(this=False) my_data = transformer.transform(my_data)
scipy.stats.uniform(x : array_like, q : array_like, loc : array_like - optional, scale :
array_like, optional)
A uniform continuous random variable. In the standard form, the distribution is uniform on [0, 1].
from scipy.stats import uniform mean, var, skew, kurt = uniform.stats(moments='mvsk') x = np.linspace(uniform.ppf(0.01), uniform.ppf(0.99), 100) ax.plot(x, uniform.pdf(x), 'r-', lw=5, alpha=0.6, label='uniform pdf')
sklearn.datasets.make_blobs(n_samples=100, n_features=2, *, centers=None, cluster_std=1.0, center_box=(-10.0, 10.0), shuffle=True, random_state=None, return_centers=False)
Generate isotropic Gaussian blobs for clustering. Модуль для генерации данных.
from sklearn.datasets import make_blobs X, y = make_blobs(n_samples=10, centers=3, n_features=2, random_state=0) print(X.shape) 👉 (10, 2) y 👉 array([0, 0, 1, 0, 2, 2, 2, 1, 1, 0])
sklearn.compose.make_column_selector(pattern=None, *, dtype_include=None, dtype_exclude=None)
Create a callable to select columns to be used with ColumnTransformer. Can select columns based on datatype or the columns name with a regex.
enumerate(iterable, start=0)
function takes a collection (e.g. a tuple) and returns it as an enumerate object. Покажет порядковые номер элементов.
x = ('apple', 'banana', 'cherry') y = enumerate(x) print(list(y)) 👉 [(0, 'apple'), (1, 'banana'), (2, 'cherry')]
l1 = ["eat", "sleep", "repeat"] s1 = "geek" #changing start index to 2 from 0 print (list(enumerate(s1, 2))) 👉 [(2, 'g'), (3, 'e'), (4, 'e'), (5, 'k')]
iter()
function returns an iterator object. Итератор (iterator) - это объект, который возвращает свои элементы по одному за раз.
x = iter(["apple", "banana", "cherry"]) print(next(x)) 👉 apple print(next(x)) 👉 banana
sklearn.compose.make_column_transformer(*transformers, remainder=’drop’, sparse_threshold=0.3, n_jobs=None, verbose=False, verbose_feature_names_out=True)
Construct a ColumnTransformer from the given transformers.
from sklearn.compose import make_column_transformer make_column_transformer( (StandardScaler(), ['numerical_column']), (OneHotEncoder(), ['categorical_column'])) ColumnTransformer(transformers=[('standardscaler', StandardScaler(...), ['numerical_column']), ('onehotencoder', OneHotEncoder(...), ['categorical_column'])])
sklearn.pipeline.make_union(*transformers, n_jobs=None, verbose=False)
Construct a FeatureUnion from the given transformers.
from sklearn.decomposition import PCA, TruncatedSVD from sklearn.pipeline import make_union make_union(PCA(), TruncatedSVD()) FeatureUnion(transformer_list=[('pca', PCA()), ('truncatedsvd', TruncatedSVD())])
sklearn.feature_selection.mutual_info_regression(X, y, *,discrete_features=’auto’, n_neighbors=3, copy=True, random_state=None)
Оцените взаимную информацию для дискретной целевой переменной.
Взаимная информация (MI) [1] между двумя случайными переменными является неотрицательным значением, которое измеряет зависимость между переменными.
numpy.random.uniform(low=0.0, high=1.0, size=None)
Помогает нам, получая случайные выборки из равномерного распределения данных. Затем он возвращает случайные выборки в виде массива NumPy.
np.random.uniform(2, 8, (2, 10)) 👉 array([[ 3.1517914 , 3.10313483, 2.84007134, 3.21556436, 4.64531786, 2.99232714, 7.03064897, 4.38691765, 5.27488548, 2.63472454], [ 6.39470358, 5.63084131, 4.69996748, 7.07260546, 7.44340813, 4.10722203, 7.52956646, 4.8596943 , 3.97923973, 5.64505363]])
numpy.random.RandomState(seed=None)
Container for the Mersenne Twister pseudo-random number generator.
matplotlib.quiver(*args, data=None, **kwargs)
помогает отображать векторы скорости в виде стрелок с компонентами (u, v) в точках (x, y).
fig,.subplots().quiver(x,y,u,v) plt.show()
U = [[1, 1, 1, 1], [-2, -2, -2, -2], [3, 3, 3, 3], [-3, -3, -3, -3]] V = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]] fig, ax = plt.subplots() ax. quiver(U, V) fig. set_figwidth(8) fig. set_figheight(8) plt. show()
matplotlib.tight_layout(*, pad=1.08, h_pad=None, w_pad=None, rect=None)
Adjust the padding between and around subplots.
pandas.DataFrame.shift(periods=1, freq=None, axis=0, fill_value=NoDefault.no_default)
Shift index by desired number of periods with an optional time freq.
df = pd.DataFrame({"Col1": [10, 20, 15, 30, "Col2": [13, 23, 18, 33, 48], "Col3": [17, 27, 22, 37, 52]}, index=pd.date_range("2020-01-01", "2020-01-05")) df.shift(periods=3) Col1 Col2 Col3 2020-01-01 NaN NaN NaN 2020-01-02 NaN NaN NaN 2020-01-03 NaN NaN NaN 2020-01-04 10.0 13.0 17.0 2020-01-05 20.0 23.0 27.0
pandas.DataFrame.diff(periods=1, axis=0)
First discrete difference of element. Calculates the difference of a Dataframe element compared with another element in the Dataframe (default is an element in the previous row).
df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], 'b': [1, 1, 2, 3, 5, 8], 'c': [1, 4, 9, 16, 25, 36]}) df.diff() a b c 0 NaN NaN NaN 1 1.0 0.0 3.0 2 1.0 1.0 5.0 3 1.0 1.0 7.0 4 1.0 2.0 9.0 5 1.0 3.0 11.0
numpy.convolve(a, v, mode=’full’)
Возвращает дискретную, линейную свертку двух одномерных
последовательностей.
a = np.array([0, 1, 2, 3, 2, 1, 0]) b = np.array([1, 2, 2.7, 2.9, 1, 2, 2.7]) np.convolve(a, b) 👉 array([ 0. , 1. , 4. , 9.7, 16.3, 19.9, 20.1, 18.2, 16.3, 13.1, 7.4, 2.7, 0. ])
a = np.array([0, 1, 2, 3, 2, 1, 0]) b = np.array([1, 2, 2.7, 2.9, 1, 2, 2.7]) np.convolve(a, b, mode = 'same') 👉 array([ 9.7, 16.3, 19.9, 20.1, 18.2, 16.3, 13.1])
matplotlib.subplot(*args, **kwargs)
Add an Axes to the current figure or retrieve an existing Axes. The function you can draw multiple plots in one figure.
x = np.array([0, 1, 2, 3]) y = np.array([3, 8, 1, 10]) plt. subplot(1, 2, 1) plt. plot(x,y) x = np.array([0, 1, 2, 3]) y = np.array([10, 20, 30, 40]) plt. subplot(1, 2, 2) plt. plot(x,y) plt. show()