504 - 550 Flashcards

Question 1

Q

pandas.DataFrame.abs()

Answer

A

method returns a DataFrame with the absolute value of each value. Убрать минусы с чисел.

data = [[-50, 40, 30], [-1, 2, -2]]
df = pd.DataFrame(data)
print(df.abs())

      0   1   2
  0  50  40  30
  1   1   2   2

pandas.DataFrame.abs

Question 2

Q

pandas.DataFrame.le(other, axis=’columns’, level=None)

Answer

A

method compares each value in a DataFrame to check if it is less than, or equal to a specified value, or a value from a specified DataFrame objects, and returns a DataFrame with boolean True/False for each comparison.

df = pd.DataFrame([[10, 12, 2], [3, 4, 7]])
print(df.le(7))

         0      1     2
  0  False  False  True
  1   True   True  True

pandas.DataFrame.le
pandas.DataFrame.le

Question 3

Q

pandas.DataFrame.items() or iteritems()

Answer

A

method generates an iterator object of the DataFrame, allowing us to iterate each column of the DataFrame.

data = {
  "firstname": ["Sally", "Mary", "John"],
  "age": [50, 40, 30]
}

df = pd.DataFrame(data)
for x, y in df.items():
  print(x)
  print(y)

  firstname
  0    Sally
  1     Mary
  2     John
  Name: firstname, dtype: object
  age
  0    50
  1    40
  2    30
  Name: age, dtype: int64

pandas.DataFrame.items
pandas.DataFrame.items

Question 4

Q

pandas.DataFrame.fillna(value=None, method=None, axis=None, inplace=False, limit=None, downcast=None)

Answer

A

Fill NA/NaN values using the specified method.

df = pd.DataFrame([[np.nan, 2, np.nan, 0],
                   [3, 4, np.nan, 1],
                   [np.nan, np.nan, np.nan, np.nan],
                   [np.nan, 3, np.nan, 4]],
                  columns=list("ABCD"))

df
     A    B   C    D
0  NaN  2.0 NaN  0.0
1  3.0  4.0 NaN  1.0
2  NaN  NaN NaN  NaN
3  NaN  3.0 NaN  4.0

df.fillna(0)
     A    B    C    D
0  0.0  2.0  0.0  0.0
1  3.0  4.0  0.0  1.0
2  0.0  0.0  0.0  0.0
3  0.0  3.0  0.0  4.0

pandas.DataFrame.fillna

Question 5

Q

pandas.Series.unique

Answer

A

Return unique values of Series object.

pd.Series([2, 1, 3, 3], name='A').unique()
array([2, 1, 3])

pd.Series(pd.Categorical(list('baabc'))).unique()
['b', 'a', 'c']
Categories (3, object): ['a', 'b', 'c']
pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
                         ordered=True)).unique()
['b', 'a', 'c']
Categories (3, object): ['a' < 'b' < 'c']

pandas.Series.unique

Question 6

Q

pandas.DataFrame.ndim

Answer

A

property returns the number of dimension of the DataFrame.

df = pd.read_csv('data.csv')
print(df.ndim)
👉 2

s = pd.Series({'a': 1, 'b': 2, 'c': 3})
s.ndim
👉 1

df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
df.ndim
👉 2

pandas.DataFrame.ndim
pandas.DataFrame.ndim

Question 7

Q

pandas Series.dt.strftime(*args, **kwargs)

Answer

A

used to convert to Index using specified date_format.

rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"), periods=3, freq='s')

rng.strftime('%B %d, %Y, %r')
Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
       'March 10, 2018, 09:00:02 AM'],
      dtype='object')

result = sr.dt.strftime('% B % d, % Y, % r')

result = sr.dt.strftime('% d % m % Y, % r')

pandas Series.dt.strftime
pandas Series.dt.strftime

Question 8

Q

pandas Series.str.contains(pat, case=True, flags=0, na=None, regex=True)

Answer

A

used to test if pattern or regex is contained within a string of a Series or Index.

s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
s1.str.contains('og', regex=False)

0    False
1     True
2    False
3    False
4      NaN

s1.str.contains('oG', case=True, regex=True)
0    False
1    False
2    False
3    False
4      NaN

s2 = pd.Series(['40', '40.0', '41', '41.0', '35'])
s2.str.contains('.0', regex=True)
0     True
1     True
2    False
3     True
4    False

pandas Series.str.contains
pandas Series.str.contains

Question 9

Q

pandas.DataFrame.set_index(keys, drop=True, append=False, inplace=False, verify_integrity=False)

Answer

A

method allows one or more column values become the row index.

df = pd.DataFrame({'month': [1, 4, 7, 10],
                   'year': [2012, 2014, 2013, 2014],
                   'sale': [55, 40, 84, 31]})

df.set_index(['year', 'month'])
            sale
year  month
2012  1     55
2014  4     40
2013  7     84
2014  10    31

data = {
  "name": ["Sally", "Mary", "John", "Monica"],
  "age": [50, 40, 30, 40],
  "qualified": [True, False, False, False]
}

df = pd.DataFrame(data)
newdf = df.set_index('name')
print(newdf)

         age  qualified
  name                 
  Sally   50       True
  Mary    40      False
  John    30      False
  Monica  40      False

pandas.DataFrame.set_index

Question 10

Q

pandas.DataFrame.index

Answer

A

property returns the index information of the DataFrame. The index information contains the labels of the rows. If the rows has NOT named indexes, the index property returns a RangeIndex object with the start, stop, and step values.

df = pd.read_csv('data.csv')
print(df.index)

RangeIndex(start=0, stop=169, step=1)

pandas.DataFrame.index
pandas.DataFrame.index

Question 11

Q

pandas.DataFrame.reset_index(level=None, drop=False, inplace=False, col_level=0, col_fill=’’)

Answer

A

method allows you reset the index back to the default 0, 1, 2 etc indexes. By default this method will keep the “old” idexes in a column named “index”, to avoid this, use the drop parameter.

data = {
  "name": ["Sally", "Mary", "John"],
  "age": [50, 40, 30],
  "qualified": [True, False, False]
}
idx = ["X", "Y", "Z"]
df = pd.DataFrame(data, index=idx)
newdf = df.reset_index()
print(newdf)

    index   name  age  qualified
  0     X  Sally   50       True
  1     Y   Mary   40      False
  2     Z   John   30      False

df = pd.DataFrame([('bird', 389.0),
                   ('bird', 24.0),
                   ('mammal', 80.5),
                   ('mammal', np.nan)],
                  index=['falcon', 'parrot', 'lion', 'monkey'],
                  columns=('class', 'max_speed'))

df.reset_index()
    index   class  max_speed
0  falcon    bird      389.0
1  parrot    bird       24.0
2    lion  mammal       80.5
3  monkey  mammal        NaN

pandas.DataFrame.reset_index

Question 12

Q

pandas.DataFrame.sort_index(axis=0, level=None, ascending=True, inplace=False, kind=’quicksort’, na_position=’last’, sort_remaining=True, ignore_index=False, key=None)

Answer

A

method sorts the DataFrame by the index.

df = pd.DataFrame([1, 2, 3, 4, 5], index=[100, 29, 234, 1, 150],
                  columns=['A'])
df.sort_index()
     A
1    4
29   2
100  1
150  5
234  3

df = pd.DataFrame({"a": [1, 2, 3, 4]}, index=['A', 'b', 'C', 'd'])
df.sort_index(key=lambda x: x.str.lower())
   a
A  1
b  2
C  3
d  4

data = {
  "age": [50, 40, 30, 40, 20, 10, 30],
  "qualified": [True, False, False, False, False, True, True]
}

idx = ["Mary", "Sally", "Emil", "Tobias", "Linus", "John", "Peter"]
df = pd.DataFrame(data, index = idx)

newdf = df.sort_index()
print(newdf)

          age  qualified
  Emil     30      False
  John     10       True
  Linus    20      False
  Mary     50       True
  Peter    30       True
  Sally    40      False
  Tobias   40      False

pandas.DataFrame.sort_index

Question 13

Q

pandas.DataFrame.size

Answer

A

property returns the number of elements in the DataFrame. The number of elements is the number of rows * the number of columns.
In our example the DataFrame has 169 rows and 4 columns: 169 * 4 = 676

df = pd.read_csv('data.csv')
print(df.size)

👉 676

s = pd.Series({'a': 1, 'b': 2, 'c': 3})

s.size
👉 3

df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})

df.size
👉 4

pandas.DataFrame.size
pandas.DataFrame.size

Question 14

Q

pandas.Series.is_unique

Answer

A

returns True if the data in the given Series object is unique else it return False (have duplicates or not).

sr = pd.Series(['New York', 'Chicago', 'Toronto', 'Lisbon', 'Chicago'])
# Creating the row axis labels
sr.index = ['City 1', 'City 2', 'City 3', 'City 4', 'City 5']
sr.is_unique

👉 False

pandas.Series.is_unique
pandas.Series.is_unique

Question 15

Q

pandas.Series.is_monotonic

Answer

A

It returns True if the data in the given Series object is monotonically increasing else it return False.

sr = pd.Series(['New York', 'Chicago', 'Toronto', 'Lisbon'])
sr.index = ['City 1', 'City 2', 'City 3', 'City 4']
sr.is_monotonic

👉 False

sr = pd.Series(['1/1/2018', '2/1/2018', '3/1/2018', '4/1/2018'])
sr.index = ['Day 1', 'Day 2', 'Day 3', 'Day 4']
sr.is_monotonic

👉 True

pandas.Series.is_monotonic

Question 16

Q

pandas.DataFrame.product(axis=None, skipna=True, level=None, numeric_only=None, min_count=0, **kwargs)

Answer

A

method multiplies all values in each column and returns the product for each column. By specifying the column axis (axis=’columns’), the product() method searches column-wise and returns the product of each row. The product() method does the same as the prod() method.

data = [[10, 18, 11], [13, 15, 8], [9, 20, 3]]
df = pd.DataFrame(data)
print(df.product())

  0    1170
  1    5400
  2     264

pandas.DataFrame.product
pandas.DataFrame.product

Question 17

Q

seaborn.boxplot(*, x=None, y=None, hue=None, data=None, order=None, hue_order=None, orient=None, color=None, palette=None, saturation=0.75, width=0.8, dodge=True, fliersize=5, linewidth=None, whis=1.5, ax=None, **kwargs)

Answer

A

Draw a box plot to show distributions with respect to categories. A box plot (or box-and-whisker plot) shows the distribution of quantitative data in a way that facilitates comparisons between variables or across levels of a categorical variable.

sns.set_theme(style="whitegrid")
tips = sns.load_dataset("tips")
ax = sns.boxplot(x=tips["total_bill"])

ax = sns.boxplot(x="day", y="total_bill", hue="smoker", data=tips, palette="Set3")

sns.set_style("whitegrid")
sns.boxplot(x = 'day', y = 'total_bill', data = tips)

seaborn.boxplot
seaborn.boxplot

Question 18

Q

pandas.read_csv(filepath_or_buffer, sep=NoDefault.no_default, delimiter=None, header=’infer’, names=NoDefault.no_default, index_col=None, usecols=None, squeeze=None, prefix=NoDefault.no_default, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=None, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, cache_dates=True, iterator=False, chunksize=None, compression=’infer’, thousands=None, decimal=’.’, lineterminator=None, quotechar=’”’, quoting=0,
doublequote=True, escapechar=None, comment=None, encoding=None, encoding_errors=’strict’, dialect=None, error_bad_lines=None, warn_bad_lines=None, on_bad_lines=None, delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None, storage_options=None)

Answer

A

Read a comma-separated values (csv) file into DataFrame. Also supports optionally iterating or breaking of the file into chunks.

filepath_or_buffer - Any valid string path is acceptable. The string could be a URL.
sep - Разделитель
header - Row number(s) to use as the column names, and the start of the data.
names - List of column names to use.
index_col - Column(s) to use as the row labels of the DataFrame, either given as string name or column index. If a sequence of int / str is given, a MultiIndex is used.
usecols - Return a subset of the columns. If list-like, all elements must either be positional
mangle_dupe_cols - Duplicate columns will be specified as ‘X’, ‘X.1’, …’X.N’, rather than ‘X’…’X’.
dtype - Data type for data or columns.
converters - Dict of functions for converting values in certain columns. Keys can either be integers or column labels.
skipinitialspace - Skip spaces after delimiter.
skiprows - Line numbers to skip (0-indexed) or number of lines to skip (int) at the start of the file.
skipfooterint - Number of lines at bottom of file to skip (Unsupported with engine=’c’).
nrows - Number of rows of file to read. Useful for reading pieces of large files.
na_values - Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values.
na_filter - Detect missing value markers (empty strings and the value of na_values). In data without any NAs, passing na_filter=False can improve the performance of reading a large file.
skip_blank_lines - If True, skip over blank lines rather than interpreting as NaN values.
parse_dates - bool or list of int or names or list of lists or dict, default False
infer_datetime_format - If True and parse_dates is enabled
keep_date_colbool - If True and parse_dates specifies combining multiple columns then keep the original columns.
date_parser - Function to use for converting a sequence of string columns to an array of datetime instances.
dayfirst - DD/MM format dates, international and European format.
lineterminator - Character to break file into lines. Only valid with C parser.
escapechar - One-character string used to escape other characters.
comment - Indicates remainder of line should not be parsed. If found at the beginning of a line, the line will be ignored altogether. This parameter must be a single character.
encoding - Encoding to use for UTF when reading/writing (ex. ‘utf-8’).
dialect - If provided, this parameter will override values (default or not) for the following parameters: delimiter, doublequote, escapechar, skipinitialspace, quotechar, and quoting.
on_bad_lines - {‘error’, ‘warn’, ‘skip’} Specifies what to do upon encountering a bad line (a line with too many fields).
delim_whitespace - Specifies whether or not whitespace (e.g. ‘ ‘ or ‘ ‘) will be used as the sep.

pd.read_csv('data.csv')

pandas.read_csv

Question 19

Q

pandas.Series.to_dict(into=<class ‘dict’>)

Answer

A

Convert Series to {label -> value} dict or dict-like object.

s = pd.Series([1, 2, 3, 4])
s.to_dict()
👉 {0: 1, 1: 2, 2: 3, 3: 4}

pandas.Series.to_dict

Question 20

Q

pandas.DataFrame.to_dict(orient=’dict’, into=<class ‘dict’>)

Answer

A

Convert the DataFrame to a dictionary. The type of the key-value pairs can be customized with the parameters (see below).

orient - {‘dict’, ‘list’, ‘series’, ‘split’, ‘records’, ‘index’}

df = pd.DataFrame({'col1': [1, 2], 'col2': [0.5, 0.75]}, index=['row1', 'row2'])

df.to_dict()
{'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}}

pandas.DataFrame.to_dict

Question 21

Q

pandas.DataFrame.mask(cond, other=nan, inplace=False, axis=None, level=None, errors=’raise’, try_cast=NoDefault.no_default)

Answer

A

Replace values where the condition is True.

cond - Where cond is False, keep the original value. Where True, replace with corresponding value from other. If cond is callable, it is computed on the Series/DataFrame and should return boolean Series/DataFrame or array.
other - Entries where cond is True are replaced with corresponding value from other. If other is callable, it is computed on the Series/DataFrame and should return scalar or Series/DataFrame.
inplace - Whether to perform the operation in place on the data.
axis - Alignment axis if needed.
level - Alignment level if needed.

data = {
  "age": [50, 40, 30, 40, 20, 10, 30],
  "qualified": [True, False, False, False, False, True, True]
}
df = pd.DataFrame(data)

newdf = df.mask(df["age"] > 10)
print(newdf)

    age  qualified
0   NaN        NaN
1   NaN        NaN
2   NaN        NaN
3   NaN        NaN
4   NaN        NaN
5  10.0        1.0
6   NaN        NaN

data = {
  "age": [50, 40, 30, 40, 20, 10, 30],
  "qualified": [True, False, False, False, False, True, True]
}
df = pd.DataFrame(data)

newdf = df.mask(df["age"] > 30)
print(newdf)

    age  qualified
0   NaN        NaN
1   NaN        NaN
2  30.0        0.0
3   NaN        NaN
4  20.0        0.0
5  10.0        1.0
6  30.0        1.0

data = {
  "age": [50, 40, 30, 40, 20, 10, 30],
  "qualified": [True, False, False, False, False, True, True]
}
df = pd.DataFrame(data)

newdf = df.mask(df["age"] > 50)
print(newdf)

   age  qualified
0   50       True
1   40      False
2   30      False
3   40      False
4   20      False
5   10       True
6   30       True

pandas.DataFrame.mask
pandas.DataFrame.mask

Question 22

Q

pandas.DataFrame.iloc

Answer

A

based indexing for selection by position.

data = [[50, True], [40, False], [30, False]]
df = pd.DataFrame(data)

print(df.iloc[1, 0])
👉 40

data = [[50, True], [40, False], [30, False]]
df = pd.DataFrame(data)

print(df.iloc[2, 1])
👉 False

pandas.DataFrame.iloc
pandas.DataFrame.iloc

Question 23

Q

pandas.DataFrame.head(n=5)

Answer

A

Return the first n rows. This function returns the first n rows for the object based on position. It is useful for quickly testing if your object has the right type of data in it.

For negative values of n, this function returns all rows except the last n rows, equivalent to df[:-n].

df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
                   'monkey', 'parrot', 'shark', 'whale', 'zebra']})
df.head()
0  alligator
1        bee
2     falcon
3       lion
4     monkey

df.head(3)
      animal
0  alligator
1        bee
2     falcon

df.head(-3)
      animal
0  alligator
1        bee
2     falcon
3       lion
4     monkey
5     parrot

pandas.DataFrame.head
pandas.DataFrame.head

Question 24

Q

pandas.DataFrame.tail(n=5)

Answer

A

Return the last n rows. This function returns last n rows from the object based on position. It is useful for quickly verifying data, for example, after sorting or appending rows.

For negative values of n, this function returns all rows except the first n rows, equivalent to df[n:].

df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
                   'monkey', 'parrot', 'shark', 'whale', 'zebra']})
df.tail()
   animal
4  monkey
5  parrot
6   shark
7   whale
8   zebra

df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
                   'monkey', 'parrot', 'shark', 'whale', 'zebra']})
df.tail(3)
  animal
6  shark
7  whale
8  zebra

pandas.DataFrame.tail
pandas.DataFrame.tail

Question 25

Q

pandas.DataFrame.columns

Answer

A

attribute return the column labels of the given Dataframe.

df = pd.DataFrame({'Weight': [45, 88, 56, 15, 71],
                   'Name': ['Sam', 'Andrea', 'Alex', 'Robin', 'Kia'],
                   'Age': [14, 25, 55, 8, 21]})

Create the index
index = ['Row_1', 'Row_2', 'Row_3', 'Row_4', 'Row_5']

Set the index
df.index = index

Print the DataFrame
print(df)

result = df.columns

Print the result
print(result)

pandas.DataFrame.columns
pandas.DataFrame.columns

Question 26

Q

pandas.DataFrame.shape

Answer

A

Return a tuple representing the dimensionality of the DataFrame.

df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})

df.shape
👉 (2, 2)

pandas.DataFrame.shape
pandas.DataFrame.shape

Question 27

Q

pandas.DataFrame.info(verbose=None, buf=None, max_cols=None, memory_usage=None, show_counts=None, null_counts=None)

Answer

A

Print a concise summary of a DataFrame. This method prints information about a DataFrame including the index dtype and columns, non-null values and memory usage.

df = pd.read_csv('data.csv')
df.info()

  <class 'pandas.core.frame.DataFrame'>
  RangeIndex: 169 entries, 0 to 168
  Data columns (total 4 columns):
   #   Column    Non-Null Count  Dtype  
  ---  ------    --------------  -----  
   0   Duration  169 non-null    int64  
   1   Pulse     169 non-null    int64  
   2   Maxpulse  169 non-null    int64  
   3   Calories  164 non-null    float64
  dtypes: float64(1), int64(3)
  memory usage: 5.4 KB

pandas.DataFrame.info
pandas.DataFrame.info

Question 28

Q

missingno

Answer

A

visualize the distribution of NaN values.

💡 .matrix() - Visualize missing values as a matrix
💡 .bar() - Visualize the number of missing values as a bar chart
💡 .heatmap() - Visualize the correlation between the number of missing values in different columns as a heatmap

import missingno

df = pd.read_csv("kamyr-digester.csv")
  
# Visualize missing values as a matrix
missingno.matrix(df)

# Visualize the number of missing values as a bar chart
missingno.bar()

# Visualize the correlation between the number of missing values in different columns as a heatmap
msno.heatmap(df)

missingno

Question 29

Q

thefuzz

Answer

A

string matching like a boss. It uses Levenshtein Distance to calculate the differences between sequences in a simple-to-use package.

from thefuzz import fuzz
from thefuzz import process

fuzz.ratio("this is a test", "this is a test!")
👉 97

fuzz.partial_ratio("this is a test", "this is a test!")
👉 100

fuzz.ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")
👉 91

fuzz.token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")
👉 100

fuzz.token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
👉 84

fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
👉 100

Пройтись по списку и выбрать два лучших совпадений:

choices = ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"]

process.extract("new york jets", choices, limit=2)
👉 [('New York Jets', 100), ('New York Giants', 78)]

process.extractOne("cowboys", choices)
👉 ("Dallas Cowboys", 90)

process.extractOne("System of a down - Hypnotize - Heroin", songs)
👉 ('/music/library/good/System of a Down/2005 - Hypnotize/01 - Attack.mp3', 86)

process.extractOne("System of a down - Hypnotize - Heroin", songs, scorer=fuzz.token_sort_ratio)
👉 ("/music/library/good/System of a Down/2005 - Hypnotize/10 - She's Like Heroin.mp3", 61)

thefuzz

Question 30

Q

recordlinkage

Answer

A

powerful and modular record linkage toolkit to link records in or between data sources. Короче обьединить две базы данных (например два csv файла) с отличающимеся строкамии индексами найдя схожости в строках.

import recordlinkage

Comparing all record can be computationally intensive.Therefore, we make smart set of candidate 
links with one of the built-in indexing techniques like blocking. Only records pairs agreeing on 
the 👉SURNAME👈 are included.

indexer = recordlinkage.Index() 
indexer.block('surname') 
candidate_links = indexer.index(df_a, df_b)

Eachcandidate_linkneeds  to  be  compared  on  the  comparable  attributes. 

compare = recordlinkage.Compare()
compare.string('name', 'name', method='jarowinkler', threshold=0.85) 
compare.exact('sex', 'gender')
compare.exact('dob', 'date_of_birth') 
compare.string('streetname', 'streetname', method='damerau_levenshtein', threshold=0.7) 
compare.exact('place', 'placename') 
compare.exact('haircolor', 'haircolor', missing_value=9)

The comparison vectors 
compare_vectors = compare.compute(candidate_links, df_a, df_b)

matches = features[features.sum(axis=1) > 3] 
print(len(matches))

recordlinkage
recordlinkage
recordlinkage

Question 31

Q

pandas.DataFrame.from_dict(data, orient=’columns’, dtype=None, columns=None)

Answer

A

Construct DataFrame from dict of array-like or dicts.

data = {'col_1': [3, 2, 1, 0], 'col_2': ['a', 'b', 'c', 'd']}
pd.DataFrame.from_dict(data)
   col_1 col_2
0      3     a
1      2     b
2      1     c
3      0     d

pandas.DataFrame.from_dict

Question 32

Q

pandas.DataFrame.max(axis=NoDefault.no_default, skipna=True, level=None, numeric_only=None, **kwargs)

Answer

A

Return the maximum of the values over the requested axis.

axis - Axis for the function to be applied on.
numericonlybool - Include only float, int, boolean columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series.

data = [[10, 18, 11], [13, 15, 8], [9, 20, 3]]
df = pd.DataFrame(data)
print(df.max())

 0    13
 1    20
 2    11
 dtype: int64

pandas.DataFrame.max
pandas.DataFrame.max

Question 33

Q

pandas.DataFrame.mean(axis=NoDefault.no_default, skipna=True, level=None, numeric_only=None, **kwargs)

Answer

A

Return the mean(среднее) of the values over the requested axis.

🎯 axis - Axis for the function to be applied on.
🎯 numericonly - Include only float, int, boolean columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series.

data = [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]
df = pd.DataFrame(data)
print(df.mean())

0    1.0
1    2.0
2    3.0
dtype: float64

pandas.DataFrame.mean
pandas.DataFrame.mean

Question 34

Q

pandas.DataFrame.mean(axis=NoDefault.no_default, skipna=True, level=None, numeric_only=None, **kwargs)

Answer

A

Return the mean(среднее) of the values over the requested axis.

axis - Axis for the function to be applied on.
numericonly - Include only float, int, boolean columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series.

data = [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]
df = pd.DataFrame(data)
print(df.mean())

0    1.0
1    2.0
2    3.0
dtype: float64

pandas.DataFrame.mean
pandas.DataFrame.mean

Question 35

Q

pandas.DataFrame.mean(axis=NoDefault.no_default, skipna=True, level=None, numeric_only=None, **kwargs)

Answer

A

Return the mean(среднее) of the values over the requested axis.

axis - Axis for the function to be applied on.
numericonly - Include only float, int, boolean columns. If None, will attempt to use everything, then use only numeric data. Not implemented for Series.

data = [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]
df = pd.DataFrame(data)
print(df.mean())

0    1.0
1    2.0
2    3.0
dtype: float64

pandas.DataFrame.mean
pandas.DataFrame.mean

Question 36

Q

pandas.Series.to_list()

Answer

A

Return a list of the values.

pandas.Series.to_list

Question 37

Q

pandas.DataFrame.notna()

Answer

A

method returns a DataFrame object where all the values are replaced with a Boolean value True for NOT NA (not-a -number) values, and otherwise False.

df = pd.read_csv('data.csv')
newdf = df.notna()
print(newdf.to_string())

       Duration  Pulse  Maxpulse  Calories
  0        True   True      True      True
  1        True   True      True      True
  2        True   True      True      True
  3        True   True      True      True
  4        True   True      True      True
  5        True   True      True      True
  6        True   True      True      True
  7        True   True      True      True
  8        True   True      True      True

pandas.DataFrame.notna
pandas.DataFrame.notna

Question 38

Q

pandas.DataFrame.sort_values(by, axis=0, ascending=True, inplace=False, kind=’quicksort’, na_position=’last’, ignore_index=False, key=None)

Answer

A

method sorts the DataFrame by the specified label.

🎯 ascending(bool or list of bool, default True) - Sort ascending vs. descending. Specify list for multiple sort orders. If this is a list of bools, must match the length of the by.

🎯 inplace(bool, default False) - If True, perform operation in-place.

data = {
  "age": [50, 40, 30, 40, 20, 10, 30],
  "qualified": [True, False, False, False, False, True, True]
}
df = pd.DataFrame(data)
newdf = df.sort_values(by='age')
print(newdf)

     age  qualified
  5   10       True
  4   20      False
  2   30      False
  6   30       True
  1   40      False
  3   40      False
  0   50       True

pandas.DataFrame.sort_values

Question 39

Q

pandas.DataFrame.copy(deep=True)

Answer

A

method returns a copy of the DataFrame.

data = {
  "name": ["Sally", "Mary", "John"],
  "qualified": [True, False, False]
}

df = pd.DataFrame(data)

newdf = df.copy()

pandas.DataFrame.copy
pandas.DataFrame.copy

Question 40

Q

pandas.concat(objs, axis=0, join=’outer’, ignore_index=False, keys=None, levels=None, names=None, verify_integrity=False, sort=False, copy=True)

Answer

A

Объединить pandas objects along a particular axis with optional set logic along the other axes.

s1 = pd.Series(['a', 'b'])
s2 = pd.Series(['c', 'd'])
pd.concat([s1, s2])
0    a
1    b
0    c
1    d

s1 = pd.Series(['a', 'b'])
s2 = pd.Series(['c', 'd'])
pd.concat([s1, s2], keys=['s1', 's2'])
s1  0    a
    1    b
s2  0    c
    1    d

s1 = pd.Series(['a', 'b'])
s2 = pd.Series(['c', 'd'])
pd.concat([s1, s2], keys=['s1', 's2'], names=['Series name', 'Row ID'])
Series name  Row ID
s1           0         a
             1         b
s2           0         c
             1         d

pandas.concat

Question 41

Q

pandas.DataFrame.count(axis=0, level=None, numeric_only=False)

Answer

A

method counts the number of not empty values for each row, or column if you specify the axis parameter as axis=’columns’, and returns a Series object with the result for each row (or column).

data = {
  "Duration": [50, 40, None, None, 90, 20],
  "Pulse": [109, 140, 110, 125, 138, 170]
}

df = pd.DataFrame(data)
print(df.count())

Duration    4
Pulse          6

pandas.DataFrame.count
pandas.DataFrame.count

Question 42

Q

pandas.DataFrame.median(axis=NoDefault.no_default, skipna=True, level=None, numeric_only=None, **kwargs)

Answer

A

method returns a Series with the median value of each column.

Mean - The average value
Median - The mid point value
Mode - The most common value

data = [[4, 2, 3], [2, 2, 3], [2, 1, 3], [4, 1, 3]]
df = pd.DataFrame(data)
print(df.median())

0    3.0
1    1.5
2    3.0

pandas.DataFrame.median
pandas.DataFrame.median

Question 43

Q

pandas.DataFrame.value_counts(subset=None, normalize=False, sort=True, ascending=False, dropna=True)

Answer

A

Return a Series containing counts of unique rows in the DataFrame.

df = pd.DataFrame({'num_legs': [2, 4, 4, 6],
                   'num_wings': [2, 0, 0, 0]},
                  index=['falcon', 'dog', 'cat', 'ant'])

df.value_counts()
num_legs  num_wings
4         0            2
2         2            1
6         0            1

pandas.DataFrame.value_counts

Question 44

Q

pandas.Series.str.lower()

Answer

A

Convert strings in the Series/Index to lowercase.

s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
s
0                 lower
1              CAPITALS
2    this is a sentence
3              SwApCaSe

s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
s.str.lower()
0                 lower
1              capitals
2    this is a sentence
3              swapcase

s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
s.str.upper()
0                 LOWER
1              CAPITALS
2    THIS IS A SENTENCE
3              SWAPCASE

s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
s.str.swapcase()
0                 LOWER
1              capitals
2    THIS IS A SENTENCE
3              sWaPcAsE

pandas.Series.str.lower

Question 45

Q

pandas.DataFrame.describe(percentiles=None, include=None, exclude=None, datetime_is_numeric=False)

Answer

A

method returns description of the data in the DataFrame.

count - The number of not-empty values.
mean - The average (mean) value.
std - The standard deviation.
min - the minimum value.
25% - The 25% percentile*.
50% - The 50% percentile*.
75% - The 75% percentile*.
max - the maximum value.

data = [[10, 18, 11], [13, 15, 8], [9, 20, 3]]
df = pd.DataFrame(data)
print(df.describe())

s = pd.Series([1, 2, 3])
s.describe()
count    3.0
mean     2.0
std      1.0
min      1.0
25%      1.5
50%      2.0
75%      2.5
max      3.0

s = pd.Series(['a', 'a', 'b', 'c'])
s.describe()
count     4
unique    3
top       a
freq      2

pandas.DataFrame.describe
pandas.DataFrame.describe

Question 46

Q

os.environ

Answer

A

известно как объект мэппинга (сопоставления), который работает со словарем переменных пользовательской среды. Каждый раз, когда вы пользуетесь своим компьютером, некоторые переменные среды уже установлены. Это дает вам полезную информацию, такую как количество процессоров, тип ОЗУ, имя компьютера, и так далее.

print(os.environ)

environ({'ALLUSERSPROFILE': 'C:\\ProgramData', 'APPDATA': 'C:\\Users\\Viktor\\AppData\\Roaming', 'COMMONPROGRAMFILES': 'C:\\Program Files\\Common Files', 'COMMONPROGRAMFILES(X86)': 'C:\\Program Files (x86)\\Common Files', 'COMMONPROGRAMW6432': 'C:\\Program Files\\Common Files', 'COMPUTERNAME': 'MININT-DUEP7BN', 'COMSPEC': 'C:\\WINDOWS\\system32\\cmd.exe', 'DEVMGR_SHOW_NONPRESENT_DEVICES': '1', 'DRIVERDATA': 'C:\\Windows\\System32\\Drivers\\DriverData', 'EMAIL': 'C:\\ProgramData\\Application Data\\The Bat!', 'FPS_BROWSER_APP_PROFILE_STRING': 'Internet Explorer', 'FPS_BROWSER_USER_PROFILE_STRING': 'Default', 'HOMEDRIVE': 'C:', 'HOMEPATH': '\\Users\\Viktor', 'IDEA_INITIAL_DIRECTORY': 'C:\\Users\\Viktor\\Desktop', 'INTELLIJ IDEA': 'C:\\Program Files\\JetBrains\\IntelliJ IDEA 2021.3\\bin;', 'LOCALAPPDATA': 'C:\\Users\\Viktor\\AppData\\Local', 'LOGONSERVER': '\\\\MININT-DUEP7BN', 'NUMBER_OF_PROCESSORS': '8', 'ONEDRIVE': 'C:\\Users\\Viktor\\OneDrive', 'OS': 'Windows_NT', 'PATH': 'C:\\Program Files (x86)\\Common Files\\Oracle\\Java\\javapath;C:\\Program Files (x86)\\Razer Chroma SDK\\bin;C:\\Program Files\\Razer Chroma SDK\\bin;C:\\ProgramData\\Oracle\\Java\\javapath;C:\\windows\\system32;C:\\windows;C:\\windows\\System32\\Wbem;C:\\windows\\System32\\WindowsPowerShell\\v1.0\\;C:\\Program Files (x86)\\NVIDIA Corporation\\PhysX\\Common;C:\\Program Files\\Intel\\WiFi\\bin\\;C:\\Program Files\\Common Files\\Intel\\WirelessCommon\\;C:\\Program Files (x86)\\Skype\\Phone\\;C:\\Program Files (x86)\\Common Files\\Acronis\\VirtualFile\\;C:\\Program Files (x86)\\Common Files\\Acronis\\VirtualFile64\\;C:\\Program Files (x86)\\Common Files\\Acronis\\SnapAPI\\;C:\\WINDOWS\\system32;C:\\WINDOWS;C:\\WINDOWS\\System32\\Wbem;C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\;c:\\xampp\\php\\;C:\\Users\\Viktor\\AppData\\Local\\Microsoft\\WindowsApps;C:\\Program Files (x86)\\Common Files\\Acronis\\FileProtector\\;C:\\Program Files (x86)\\Common Files\\Acronis\\FileProtector64\\;C:\\Program Files\\NVIDIA Corporation\\NVIDIA NvDLISR;C:\\WINDOWS\\system32;C:\\WINDOWS;C:\\WINDOWS\\System32\\Wbem;C:\\WINDOWS\\System32\\WindowsPowerShell\\v1.0\\;C:\\WINDOWS\\System32\\OpenSSH\\;D:\\PostgreSQL\\pg11\\bin;C:\\Program Files\\Process Lasso\\;C:\\Program Files\\PuTTY\\;C:\\Program Files\\dotnet\\;C:\\Users\\Viktor\\AppData\\Local\\Programs\\Python\\Python310\\Scripts\\;C:\\Users\\Viktor\\AppData\\Local\\Programs\\Python\\Python310\\;C:\\Users\\Viktor\\AppData\\Local\\Microsoft\\WindowsApps;h:\\Code\\pypy3.8-v7.3.7-win64\\;C:\\Users\\Viktor\\.dotnet\\tools;C:\\Program Files\\JetBrains\\IntelliJ IDEA 2021.3\\bin;', 'PATHEXT': '.COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC', 'PROCESSOR_ARCHITECTURE': 'AMD64', 'PROCESSOR_IDENTIFIER': 'Intel64 Family 6 Model 94 Stepping 3, GenuineIntel', 'PROCESSOR_LEVEL': '6', 'PROCESSOR_REVISION': '5e03', 'PROGRAMDATA': 'C:\\ProgramData', 'PROGRAMFILES': 'C:\\Program Files', 'PROGRAMFILES(X86)': 'C:\\Program Files (x86)', 'PROGRAMW6432': 'C:\\Program Files', 'PSMODULEPATH': 'C:\\Program Files\\WindowsPowerShell\\Modules;C:\\WINDOWS\\system32\\WindowsPowerShell\\v1.0\\Modules', 'PUBLIC': 'C:\\Users\\Public', 'PYCHARM_DISPLAY_PORT': '63342', 'PYCHARM_HOSTED': '1', 'PYTHONIOENCODING': 'UTF-8', 'PYTHONPATH': 'H:\\Code\\Созданные проги\\No Interface\\untitled1;C:\\Users\\Viktor\\AppData\\Roaming\\JetBrains\\IntelliJIdea2020.1\\plugins\\python\\helpers\\pycharm_matplotlib_backend;C:\\Users\\Viktor\\AppData\\Roaming\\JetBrains\\IntelliJIdea2020.1\\plugins\\python\\helpers\\pycharm_display', 'PYTHONUNBUFFERED': '1', 'SESSIONNAME': 'Console', 'SYSTEMDRIVE': 'C:', 'SYSTEMROOT': 'C:\\WINDOWS', 'TEMP': 'C:\\Users\\Viktor\\AppData\\Local\\Temp', 'TMP': 'C:\\Users\\Viktor\\AppData\\Local\\Temp', 'ULTRAMON_LANGDIR': 'C:\\Program Files\\UltraMon\\Resources\\en', 'USERDOMAIN': 'MININT-DUEP7BN', 'USERDOMAIN_ROAMINGPROFILE': 'MININT-DUEP7BN', 'USERNAME': 'Viktor', 'USERPROFILE': 'C:\\Users\\Viktor', 'VBOX_MSI_INSTALL_PATH': 'C:\\Program Files\\Oracle\\VirtualBox\\', 'WINDIR': 'C:\\WINDOWS'})

os.environ

Question 47

Q

os.getenv(key, default=None)

Answer

A

вместо словаря os.environ заключается в том, что если вы находитесь в положении, когда вам нужно получить доступ к переменной среды, которая не существует, функция getenv попросту ничего не сделает. Если вы попытаетесь сделать то же самое, пользуясь os.environ, вы получите уведомление об ошибке.

print(os.getenv("TMP"))
👉  C:\Users\Viktor\AppData\Local\Temp

def start():
    """returns the right message"""
    mode = os.getenv('FLASK_ENV')
    return f"Starting in {mode} mode..." if mode is not None else f"Starting in empty mode..."

FLASK_ENV=development python flask_option.py
# # => "Starting in development mode..."
# 
# FLASK_ENV=production python flask_option.py
# # => "Starting in production mode..."
# 
# python flask_option.py
# # => "Starting in empty mode..."

os.getenv

Question 48

Q

MySQL.REGEXP

Answer

A

pattern matching.

Не начинаются и не заканчиваются на определенные буквы и символы

SELECT DISTINCT City FROM Station
WHERE CITY REGEXP '^[^aeiou].*[^aeiou]$';

Match beginning of string(^): Gives all the names starting with ‘sa’.Example- sam,samarth.

SELECT name FROM student_tbl WHERE name REGEXP '^sa';

Match the end of a string($): Gives all the names ending with ‘on’.Example – norton,merton.

SELECT name FROM student_tbl WHERE name REGEXP 'on$';

Match the end of a string($): Gives all the names ending with ‘on’.Example – norton,merton.

SELECT name FROM student_tbl WHERE name REGEXP 'on$';

Matches the end of words[[:>:]]: Gives all the titles ending with character “ack”. Example – Black.

SELECT title FROM movies_tbl WHERE REGEXP 'ack[[:>:]]';

MySQL.REGEXP

Question 49

Q

SQL.WITH

Answer

A

allows you to give a sub-query block a name (a process also called sub-query refactoring), which can be referenced in several places within the main SQL query.

WITH temporaryTable (averageValue) as
    (SELECT avg(Attr1)
    FROM Table)
    SELECT Attr1
    FROM Table, temporaryTable
    WHERE Table.Attr1 > temporaryTable.averageValue;

WITH temporaryTable(averageValue) as
    (SELECT avg(Salary)
    from Employee)
        SELECT EmployeeID,Name, Salary 
        FROM Employee, temporaryTable 
        WHERE Employee.Salary > temporaryTable.averageValue;

WITH totalSalary(Airline, total) as
    (SELECT Airline, sum(Salary)
    FROM Pilot
    GROUP BY Airline),
    airlineAverage(avgSalary) as 
    (SELECT avg(Salary)
    FROM Pilot )
    SELECT Airline
    FROM totalSalary, airlineAverage
    WHERE totalSalary.total > airlineAverage.avgSalary;

SQL.WITH

Question 50

Q

SQL.RANK()

Answer

A

window function that assigns a rank to each row in a query’s result set. The rank of a row is calculated by one plus the number of ranks that comes before it.

ВЫБИРАЕМ КОЛОНКУ в которой есть определенное количество уникальных идификаторов. Метод разобет уникальные данные на рейтинг соответствующий колонки с выбранными данными на которые ориентироватся.*

PARTITION BY - clause divides the rows of the result set into partitions.
ORDER BY - clause specifies the orders of the rows in each a partition.

RANK() OVER (
	PARTITION BY <expression1>[{,<expression2>...}]
	ORDER BY <expression1> [ASC|DESC], [{,<expression1>...}]
)

SELECT OrderID, CustomerID, OrderDate,
RANK() OVER (
    PARTITION BY CustomerID
    ORDER BY OrderDate
    ) OrderRank

FROM Orders

SELECT
	Name,
	Milliseconds,
	AlbumId,
	RANK () OVER ( 
		PARTITION BY AlbumId
		ORDER BY Milliseconds DESC
	) LengthRank 
FROM
	tracks;

SQL.RANK
SQL.RANK
SQL.RANK

Brainscape's Knowledge GenomeTM

504 - 550 Flashcards

Brainscape's Knowledge Genome^TM