mirror of
https://github.com/m-lamonaca/dev-notes.git
synced 2025-06-08 10:47:13 +00:00
Fix typos
This commit is contained in:
parent
76550dfa3c
commit
5c0799df7f
118 changed files with 1150 additions and 1602 deletions
|
@ -16,7 +16,7 @@ Unless explicitly specified `np.array` tries to infer a good data type for the a
|
|||
The data type is stored in a special dtype object.
|
||||
|
||||
```py
|
||||
var = np.array(sequence) # createa array
|
||||
var = np.array(sequence) # creates array
|
||||
var = np.asarray(sequence) # convert input to array
|
||||
var = np.ndarray(*sequence) # creates multidimensional array
|
||||
var = np.asanyarray(*sequence) # convert the input to an ndarray
|
||||
|
@ -53,12 +53,12 @@ The numerical `dtypes` are named the same way: a type name followed by a number
|
|||
| complex64, complex128, complex256 | c8, c16, c32 | Complex numbers represented by two 32, 64, or 128 floats, respectively |
|
||||
| bool | ? | Boolean type storing True and False values |
|
||||
| object | O | Python object type |
|
||||
| string_ | `S<num>` | Fixed-length string type (1 byte per character), `<num>` is string lenght |
|
||||
| unicode_ | `U<num>` | Fixed-length unicode type, `<num>` is lenght |
|
||||
| string_ | `S<num>` | Fixed-length string type (1 byte per character), `<num>` is string length |
|
||||
| unicode_ | `U<num>` | Fixed-length unicode type, `<num>` is length |
|
||||
|
||||
## OPERATIONS BETWEEN ARRAYS AND SCALARS
|
||||
|
||||
Any arithmetic operations between equal-size arrays applies the operation elementwise.
|
||||
Any arithmetic operations between equal-size arrays applies the operation element-wise.
|
||||
|
||||
array `+` scalar --> element-wise addition (`[1, 2, 3] + 2 = [3, 4, 5]`)
|
||||
array `-` scalar --> element-wise subtraction (`[1 , 2, 3] - 2 = [-2, 0, 1]`)
|
||||
|
@ -73,7 +73,7 @@ array_1 `/` array_2 --> element-wise division (`[1, 2, 3] / [3, 2, 1] = [0.33, 1
|
|||
## SHAPE MANIPULATION
|
||||
|
||||
```py
|
||||
np.reshape(array, newshape) # changes the shape of the array
|
||||
np.reshape(array, new_shape) # changes the shape of the array
|
||||
np.ravel(array) # returns the array flattened
|
||||
array.resize(shape) # modifies the array itself
|
||||
array.T # returns the array transposed
|
||||
|
@ -87,7 +87,7 @@ np.swapaxes(array, first_axis, second_axis) # interchange two axes of an array
|
|||
```py
|
||||
np.vstack((array1, array2)) # takes tuple, vertical stack of arrays (column wise)
|
||||
np.hstack((array1, array2)) # takes a tuple, horizontal stack of arrays (row wise)
|
||||
np.dstack((array1, array2)) # takes a tuple, depth wise stack of arrays (3rd dimesion)
|
||||
np.dstack((array1, array2)) # takes a tuple, depth wise stack of arrays (3rd dimension)
|
||||
np.stack(*arrays, axis) # joins a sequence of arrays along a new axis (axis is an int)
|
||||
np.concatenate((array1, array2, ...), axis) # joins a sequence of arrays along an existing axis (axis is an int)
|
||||
```
|
||||
|
@ -95,32 +95,32 @@ np.concatenate((array1, array2, ...), axis) # joins a sequence of arrays along a
|
|||
## SPLITTING ARRAYS
|
||||
|
||||
```py
|
||||
np.split(array, indices) # splits an array into equalli long sub-arrays (indices is int), if not possible raises error
|
||||
np.split(array, indices) # splits an array into equall7 long sub-arrays (indices is int), if not possible raises error
|
||||
np.vsplit(array, indices) # splits an array equally into sub-arrays vertically (row wise) if not possible raises error
|
||||
np.hsplit(array, indices) # splits an array equally into sub-arrays horizontally (column wise) if not possible raises error
|
||||
np.dsplit(array, indices) # splits an array into equally sub-arrays along the 3rd axis (depth) if not possible raises error
|
||||
np.array_split(array, indices) # splits an array into sub-arrays, arrays can be of different lenghts
|
||||
np.array_split(array, indices) # splits an array into sub-arrays, arrays can be of different lengths
|
||||
```
|
||||
|
||||
## VIEW()
|
||||
|
||||
```py
|
||||
var = array.view() # creates a new array that looks at the same data
|
||||
# slicinga returnas a view
|
||||
# view shapes are separated but assignement changes all arrays
|
||||
# slicing returns a view
|
||||
# view shapes are separated but assignment changes all arrays
|
||||
```
|
||||
|
||||
## COPY()
|
||||
|
||||
```py
|
||||
var = array.copy() # creates a deepcopy of the array
|
||||
var = array.copy() # creates a deep copy of the array
|
||||
```
|
||||
|
||||
## INDEXING, SLICING, ITERATING
|
||||
|
||||
1-dimensional --> sliced, iterated and indexed as standard
|
||||
n-dimensinal --> one index per axis, index given in tuple separated by commas `[i, j] (i, j)`
|
||||
dots (`...`) represent as meny colons as needed to produce complete indexing tuple
|
||||
n-dimensional --> one index per axis, index given in tuple separated by commas `[i, j] (i, j)`
|
||||
dots (`...`) represent as many colons as needed to produce complete indexing tuple
|
||||
|
||||
- `x[1, 2, ...] == [1, 2, :, :, :]`
|
||||
- `x[..., 3] == [:, :, :, :, 3]`
|
||||
|
@ -134,7 +134,7 @@ iteration on first index, use .flat() to iterate over each element
|
|||
|
||||
## UNIVERSAL FUNCTIONS (ufunc)
|
||||
|
||||
Functions that performs elemen-wise operations (vectorization).
|
||||
Functions that performs element-wise operations (vectorization).
|
||||
|
||||
```py
|
||||
np.abs(array) # vectorized abs(), return element absolute value
|
||||
|
@ -151,7 +151,7 @@ np.ceil(array) # vectorized ceil()
|
|||
np.floor(array) # vectorized floor()
|
||||
np.rint(array) # vectorized round() to nearest int
|
||||
np.modf(array) # vectorized divmod(), returns the fractional and integral parts of element
|
||||
np.isnan(array) # vectorized x == NaN, return bollean array
|
||||
np.isnan(array) # vectorized x == NaN, return boolean array
|
||||
np.isinf(array) # vectorized test for positive or negative infinity, return boolean array
|
||||
np.isfineite(array) # vectorized test fo finiteness, returns boolean array
|
||||
np.cos(array) # vectorized cos(x)
|
||||
|
@ -163,7 +163,7 @@ np.tanh(array) # vectorized tanh(x)
|
|||
np.arccos(array) # vectorized arccos(x)
|
||||
np.arcsinh(array) # vectorized arcsinh(x)
|
||||
np.arctan(array) # vectorized arctan(x)
|
||||
np.arccosh(array) # vectorized arccos(x)
|
||||
np.arccosh(array) # vectorized arccosh(x)
|
||||
np.arcsinh(array) # vectorized arcsin(x)
|
||||
np.arctanh(array) # vectorized arctanh(x)
|
||||
np.logical_not(array) # vectorized not(x), equivalent to -array
|
||||
|
@ -246,13 +246,13 @@ np.setxor1d() # Set symmetric differences; elements that are in either of the a
|
|||
## FILE I/O WITH ARRAYS
|
||||
|
||||
```py
|
||||
np.save(file, array) # save array to binary file in .npy fromat
|
||||
np.savez(file, *array) # saveseveral arrays into a single file in uncompressed .npz format
|
||||
np.save(file, array) # save array to binary file in .npy format
|
||||
np.savez(file, *array) # save several arrays into a single file in uncompressed .npz format
|
||||
np.savez_compressed(file, *args, *kwargs) # save several arrays into a single file in compressed .npz format
|
||||
# *ARGS: arrays to save to the file. arrays will be saved with names “arr_0”, “arr_1”, and so on
|
||||
# *ARGS: arrays to save to the file. arrays will be saved with names "arr_0", "arr_1", and so on
|
||||
# **KWARGS: arrays to save to the file. arrays will be saved in the file with the keyword names
|
||||
|
||||
np.savetxt(file, X, fmt="%.18e", delimiter=" ") # save arry to text file
|
||||
np.savetxt(file, X, fmt="%.18e", delimiter=" ") # save array to text file
|
||||
# X: 1D or 2D
|
||||
# FMT: Python Format Specification Mini-Language
|
||||
# DELIMITER: {str} -- string used to separate values
|
||||
|
@ -272,14 +272,14 @@ np.diag(array, k=0) # extract a diagonal or construct a diagonal array
|
|||
|
||||
np.dot(x ,y) # matrix dot product
|
||||
np.trace(array, offset=0, dtype=None, out=None) # return the sum along diagonals of the array
|
||||
# OFFSET: {int} -- offest of the diagonal from the main diagonal
|
||||
# OFFSET: {int} -- offset of the diagonal from the main diagonal
|
||||
# dtype: {dtype} -- determines the data-type of the returned array
|
||||
# OUT: {ndarray} -- array into which the output is placed
|
||||
|
||||
np.linalg.det(A) # compute the determinant of an array
|
||||
np.linalg.eig(A) # compute the eigenvalues and right eigenvectors of a square array
|
||||
np.linalg.inv(A) # compute the (multiplicative) inverse of a matrix
|
||||
# Ainv satisfies dot(A, Ainv) = dor(Ainv, A) = eye(A.shape[0])
|
||||
# A_inv satisfies dot(A, A_inv) = dor(A_inv, A) = eye(A.shape[0])
|
||||
|
||||
np.linalg.pinv(A) # compute the (Moore-Penrose) pseudo-inverse of a matrix
|
||||
np.linalg.qr() # factor the matrix a as qr, where q is orthonormal and r is upper-triangular
|
||||
|
@ -304,13 +304,13 @@ np.random.Generator.beta(a, b, size=None) # draw samples from a Beta distributi
|
|||
|
||||
np.random.Generator.binomial(n, p, size=None) # draw samples from a binomial distribution
|
||||
# N: {int, array ints} -- parameter of the distribution, >= 0
|
||||
# P: {float, attay floats} -- Parameter of the distribution, >= 0 and <= 1
|
||||
# P: {float, arrey floats} -- Parameter of the distribution, >= 0 and <= 1
|
||||
|
||||
np.random.Generator.chisquare(df, size=None)
|
||||
# DF: {float, array floats} -- degrees of freedom, > 0
|
||||
|
||||
np.random.Generator.gamma(shape, scale=1.0, size=None) # draw samples from a Gamma distribution
|
||||
# SHAPE: {flaot, array floats} -- shape of the gamma distribution, != 0
|
||||
# SHAPE: {float, array floats} -- shape of the gamma distribution, != 0
|
||||
|
||||
np.random.Generator.normal(loc=0.0, scale=1.0, Size=None) # draw random samples from a normal (Gaussian) distribution
|
||||
# LOC: {float, all floats} -- mean ("centre") of distribution
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Pandas Lib
|
||||
# Pandas
|
||||
|
||||
## Basic Pandas Imports
|
||||
|
||||
|
@ -25,8 +25,8 @@ s = Series(dict) # Series created from python dict, dict keys become index valu
|
|||
```py
|
||||
s['index'] # selection by index label
|
||||
s[condition] # return slice selected by condition
|
||||
s[ : ] # slice endpoin included
|
||||
s[ : ] = *value # modifi value of entire slice
|
||||
s[ : ] # slice endpoint included
|
||||
s[ : ] = *value # modify value of entire slice
|
||||
s[condition] = *value # modify slice by condition
|
||||
```
|
||||
|
||||
|
@ -35,8 +35,8 @@ s[condition] = *value # modify slice by condition
|
|||
Missing data appears as NaN (Not a Number).
|
||||
|
||||
```py
|
||||
pd.isnull(array) # retunn a Series index-bool indicating wich indexes dont have data
|
||||
pd.notnull(array) # retunn a Series index-bool indicating wich indexes have data
|
||||
pd.isnull(array) # return a Series index-bool indicating which indexes don't have data
|
||||
pd.notnull(array) # return a Series index-bool indicating which indexes have data
|
||||
array.isnull()
|
||||
array.notnull()
|
||||
```
|
||||
|
@ -53,18 +53,18 @@ s.index.name = "index name" # renames index
|
|||
### SERIES METHODS
|
||||
|
||||
```py
|
||||
pd.Series.isin(self, values) # boolean Series showing whether elements in Series matcheselements in values exactly
|
||||
pd.Series.isin(self, values) # boolean Series showing whether elements in Series matches elements in values exactly
|
||||
|
||||
# Conform Series to new index, new object produced unless the new index is equivalent to current one and copy=False
|
||||
pd.Series.reindex(delf, index=None, **kwargs)
|
||||
pd.Series.reindex(self, index=None, **kwargs)
|
||||
# INDEX: {array} -- new labels / index
|
||||
# METHOD: {none (dont fill gaps), pad (fill or carry values forward), backfill (fill or carry values backward)}-- hole filling method
|
||||
# METHOD: {none (don't fill gaps), pad (fill or carry values forward), backfill (fill or carry values backward)}-- hole filling method
|
||||
# COPY: {bool} -- return new object even if index is same -- DEFAULT True
|
||||
# FILLVALUE: {scalar} --value to use for missing values. DEFAULT NaN
|
||||
|
||||
pd.Series.drop(self, index=None, **kwargs) # return Series with specified index labels removed
|
||||
# INPLACE: {bool} -- if true do operation in place and return None -- DEFAULT False
|
||||
# ERRORS: {ignore, raise} -- If ‘ignore’, suppress error and existing labels are dropped
|
||||
# ERRORS: {ignore, raise} -- If "ignore", suppress error and existing labels are dropped
|
||||
# KeyError raised if not all of the labels are found in the selected axis
|
||||
|
||||
pd.Series.value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True)
|
||||
|
@ -72,7 +72,7 @@ pd.Series.value_counts(self, normalize=False, sort=True, ascending=False, bins=N
|
|||
# SORT: {bool} -- sort by frequency -- DEFAULT True
|
||||
# ASCENDING: {bool} -- sort in ascending order -- DEFAULT False
|
||||
# BINS: {int} -- group values into half-open bins, only works with numeric data
|
||||
# DROPNA: {bool} -- dont include counts of NaN
|
||||
# DROPNA: {bool} -- don't include counts of NaN
|
||||
```
|
||||
|
||||
## DATAFRAME
|
||||
|
@ -124,19 +124,19 @@ df.T # transpose
|
|||
### DATAFRAME METHODS
|
||||
|
||||
```py
|
||||
pd.DataFrame.isin(self , values) # boolean DataFrame showing whether elements in DataFrame matcheselements in values exactly
|
||||
pd.DataFrame.isin(self , values) # boolean DataFrame showing whether elements in DataFrame matches elements in values exactly
|
||||
|
||||
# Conform DataFrame to new index, new object produced unless the new index is equivalent to current one and copy=False
|
||||
pd.DataFrame.reindex(self, index=None, columns=None, **kwargs)
|
||||
# INDEX: {array} -- new labels / index
|
||||
# COLUMNS: {array} -- new labels / columns
|
||||
# METHOD: {none (dont fill gaps), pad (fill or carry values forward), backfill (fill or carry values backward)}-- hole filling method
|
||||
# METHOD: {none (don't fill gaps), pad (fill or carry values forward), backfill (fill or carry values backward)}-- hole filling method
|
||||
# COPY: {bool} -- return new object even if index is same -- DEFAULT True
|
||||
# FILLVALUE: {scalar} --value to use for missing values. DEFAULT NaN
|
||||
|
||||
pd.DataFrame.drop(self, index=None, columns=None, **kwargs) # Remove rows or columns by specifying label names
|
||||
# INPLACE: {bool} -- if true do operation in place and return None -- DEFAULT False
|
||||
# ERRORS: {ignore, raise} -- If ‘ignore’, suppress error and existing labels are dropped
|
||||
# ERRORS: {ignore, raise} -- If "ignore", suppress error and existing labels are dropped
|
||||
# KeyError raised if not all of the labels are found in the selected axis
|
||||
```
|
||||
|
||||
|
@ -147,7 +147,7 @@ Holds axis labels and metadata, immutable.
|
|||
### INDEX TYPES
|
||||
|
||||
```py
|
||||
pd.Index # immutable ordered ndarray, sliceable. stortes axis labels
|
||||
pd.Index # immutable ordered ndarray, sliceable. stores axis labels
|
||||
pd.Int64Index # special case of Index with purely integer labels
|
||||
pd.MultiIndex # multi-level (hierarchical) index object for pandas objects
|
||||
pd.PeriodINdex # immutable ndarray holding ordinal values indicating regular periods in time
|
||||
|
@ -169,27 +169,27 @@ pd.Index.hasnans # Return True if the index has NaNs
|
|||
pd.Index.append(self, other) # append a collection of Index options together
|
||||
|
||||
pd.Index.difference(self, other, sort=None) # set difference of two Index objects
|
||||
# SORT: {None (attempt sorting), False (dont sort)}
|
||||
# SORT: {None (attempt sorting), False (don't sort)}
|
||||
|
||||
pd.Index.intersection(self, other, sort=None) # set intersection of two Index objects
|
||||
# SORT: {None (attempt sorting), False (dont sort)}
|
||||
# SORT: {None (attempt sorting), False (don't sort)}
|
||||
|
||||
pd.Index.union(self, other, sort=None) # set union of two Index objects
|
||||
# SORT: {None (attempt sorting), False (dont sort)}
|
||||
# SORT: {None (attempt sorting), False (don't sort)}
|
||||
|
||||
pd.Index.isin(self, values, level=None) # boolean array indicating where the index values are in values
|
||||
pd.Index.insert(self, loc, item) # make new Index inserting new item at location
|
||||
pd.Index.delete(self, loc) # make new Index with passed location(-s) deleted
|
||||
|
||||
pd.Index.drop(self, labels, errors='raise') # Make new Index with passed list of labels deleted
|
||||
# ERRORS: {ignore, raise} -- If ‘ignore’, suppress error and existing labels are dropped
|
||||
# ERRORS: {ignore, raise} -- If 'ignore', suppress error and existing labels are dropped
|
||||
# KeyError raised if not all of the labels are found in the selected axis
|
||||
|
||||
pd.Index.reindex(self, target, **kwargs) # create index with target’s values (move/add/delete values as necessary)
|
||||
# METHOD: {none (dont fill gaps), pad (fill or carry values forward), backfill (fill or carry values backward)}-- hole filling method
|
||||
pd.Index.reindex(self, target, **kwargs) # create index with target's values (move/add/delete values as necessary)
|
||||
# METHOD: {none (don't fill gaps), pad (fill or carry values forward), backfill (fill or carry values backward)}-- hole filling method
|
||||
```
|
||||
|
||||
## ARITMETHIC OPERATIONS
|
||||
## ARITHMETIC OPERATIONS
|
||||
|
||||
NumPy arrays operations preserve labels-value link.
|
||||
Arithmetic operations automatically align differently indexed data.
|
||||
|
@ -199,10 +199,10 @@ Missing values propagate in arithmetic computations (NaN `<operator>` value = Na
|
|||
|
||||
```py
|
||||
self + other
|
||||
pd.Series.add(self, other, fill_value=None) # add(), supports substituion of NaNs
|
||||
pd,Series.radd(self, other, fill_value=None) # radd(), supports substituion of NaNs
|
||||
pd.DataFrame.add(self, other, axis=columns, fill_value=None) # add(), supports substituion of NaNs
|
||||
pd.DataFrame.radd(self, other, axis=columns, fill_value=None) # radd(), supports substituion of NaNs
|
||||
pd.Series.add(self, other, fill_value=None) # add(), supports substitution of NaNs
|
||||
pd,Series.radd(self, other, fill_value=None) # radd(), supports substitution of NaNs
|
||||
pd.DataFrame.add(self, other, axis=columns, fill_value=None) # add(), supports substitution of NaNs
|
||||
pd.DataFrame.radd(self, other, axis=columns, fill_value=None) # radd(), supports substitution of NaNs
|
||||
# OTHER: {scalar, sequence, Series, DataFrame}
|
||||
# AXIS: {0, 1, index, columns} -- whether to compare by the index or columns
|
||||
# FILLVALUE: {None, float} -- fill missing value
|
||||
|
@ -212,10 +212,10 @@ pd.DataFrame.radd(self, other, axis=columns, fill_value=None) # radd(), support
|
|||
|
||||
```py
|
||||
self - other
|
||||
pd.Series.sub(self, other, fill_value=None) # sub(), supports substituion of NaNs
|
||||
pd.Series.radd(self, other, fill_value=None) # radd(), supports substituion of NaNs
|
||||
ps.DataFrame.sub(self, other, axis=columns, fill_value=None) # sub(), supports substituion of NaNs
|
||||
pd.DataFrame.rsub(self, other, axis=columns, fill_value=None) # rsub(), supports substituion of NaNs
|
||||
pd.Series.sub(self, other, fill_value=None) # sub(), supports substitution of NaNs
|
||||
pd.Series.radd(self, other, fill_value=None) # radd(), supports substitution of NaNs
|
||||
ps.DataFrame.sub(self, other, axis=columns, fill_value=None) # sub(), supports substitution of NaNs
|
||||
pd.DataFrame.rsub(self, other, axis=columns, fill_value=None) # rsub(), supports substitution of NaNs
|
||||
# OTHER: {scalar, sequence, Series, DataFrame}
|
||||
# AXIS: {0, 1, index, columns} -- whether to compare by the index or columns
|
||||
# FILLVALUE: {None, float} -- fill missing value
|
||||
|
@ -225,10 +225,10 @@ pd.DataFrame.rsub(self, other, axis=columns, fill_value=None) # rsub(), support
|
|||
|
||||
```py
|
||||
self * other
|
||||
pd.Series.mul(self, other, fill_value=None) # mul(), supports substituion of NaNs
|
||||
pd.Series.rmul(self, other, fill_value=None) # rmul(), supports substituion of NaNs
|
||||
ps.DataFrame.mul(self, other, axis=columns, fill_value=None) # mul(), supports substituion of NaNs
|
||||
pd.DataFrame.rmul(self, other, axis=columns, fill_value=None) # rmul(), supports substituion of NaNs
|
||||
pd.Series.mul(self, other, fill_value=None) # mul(), supports substitution of NaNs
|
||||
pd.Series.rmul(self, other, fill_value=None) # rmul(), supports substitution of NaNs
|
||||
ps.DataFrame.mul(self, other, axis=columns, fill_value=None) # mul(), supports substitution of NaNs
|
||||
pd.DataFrame.rmul(self, other, axis=columns, fill_value=None) # rmul(), supports substitution of NaNs
|
||||
# OTHER: {scalar, sequence, Series, DataFrame}
|
||||
# AXIS: {0, 1, index, columns} -- whether to compare by the index or columns
|
||||
# FILLVALUE: {None, float} -- fill missing value
|
||||
|
@ -238,14 +238,14 @@ pd.DataFrame.rmul(self, other, axis=columns, fill_value=None) # rmul(), support
|
|||
|
||||
```py
|
||||
self / other
|
||||
pd.Series.div(self, other, fill_value=None) # div(), supports substituion of NaNs
|
||||
pd.Series.rdiv(self, other, fill_value=None) # rdiv(), supports substituion of NaNs
|
||||
pd.Series.truediv(self, other, fill_value=None) # truediv(), supports substituion of NaNs
|
||||
pd.Series.rtruediv(self, other, fill_value=None) # rtruediv(), supports substituion of NaNs
|
||||
ps.DataFrame.div(self, other, axis=columns, fill_value=None) # div(), supports substituion of NaNs
|
||||
pd.DataFrame.rdiv(self, other, axis=columns, fill_value=None) # rdiv(), supports substituion of NaNs
|
||||
ps.DataFrame.truediv(self, other, axis=columns, fill_value=None) # truediv(), supports substituion of NaNs
|
||||
pd.DataFrame.rtruediv(self, other, axis=columns, fill_value=None) # rtruediv(), supports substituion of NaNs
|
||||
pd.Series.div(self, other, fill_value=None) # div(), supports substitution of NaNs
|
||||
pd.Series.rdiv(self, other, fill_value=None) # rdiv(), supports substitution of NaNs
|
||||
pd.Series.truediv(self, other, fill_value=None) # truediv(), supports substitution of NaNs
|
||||
pd.Series.rtruediv(self, other, fill_value=None) # rtruediv(), supports substitution of NaNs
|
||||
ps.DataFrame.div(self, other, axis=columns, fill_value=None) # div(), supports substitution of NaNs
|
||||
pd.DataFrame.rdiv(self, other, axis=columns, fill_value=None) # rdiv(), supports substitution of NaNs
|
||||
ps.DataFrame.truediv(self, other, axis=columns, fill_value=None) # truediv(), supports substitution of NaNs
|
||||
pd.DataFrame.rtruediv(self, other, axis=columns, fill_value=None) # rtruediv(), supports substitution of NaNs
|
||||
# OTHER: {scalar, sequence, Series, DataFrame}
|
||||
# AXIS: {0, 1, index, columns} -- whether to compare by the index or columns
|
||||
# FILLVALUE: {None, float} -- fill missing value
|
||||
|
@ -255,10 +255,10 @@ pd.DataFrame.rtruediv(self, other, axis=columns, fill_value=None) # rtruediv(),
|
|||
|
||||
```py
|
||||
self // other
|
||||
pd.Series.floordiv(self, other, fill_value=None) # floordiv(), supports substituion of NaNs
|
||||
pd.Series.rfloordiv(self, other, fill_value=None) # rfloordiv(), supports substituion of NaNs
|
||||
ps.DataFrame.floordiv(self, other, axis=columns, fill_value=None) # floordiv(), supports substituion of NaNs
|
||||
pd.DataFrame.rfloordiv(self, other, axis=columns, fill_value=None) # rfloordiv(), supports substituion of NaNs
|
||||
pd.Series.floordiv(self, other, fill_value=None) # floordiv(), supports substitution of NaNs
|
||||
pd.Series.rfloordiv(self, other, fill_value=None) # rfloordiv(), supports substitution of NaNs
|
||||
ps.DataFrame.floordiv(self, other, axis=columns, fill_value=None) # floordiv(), supports substitution of NaNs
|
||||
pd.DataFrame.rfloordiv(self, other, axis=columns, fill_value=None) # rfloordiv(), supports substitution of NaNs
|
||||
# OTHER: {scalar, sequence, Series, DataFrame}
|
||||
# AXIS: {0, 1, index, columns} -- whether to compare by the index or columns
|
||||
# FILLVALUE: {None, float} -- fill missing value
|
||||
|
@ -268,10 +268,10 @@ pd.DataFrame.rfloordiv(self, other, axis=columns, fill_value=None) # rfloordiv(
|
|||
|
||||
```py
|
||||
self % other
|
||||
pd.Series.mod(self, other, fill_value=None) # mod(), supports substituion of NaNs
|
||||
pd.Series.rmod(self, other, fill_value=None) # rmod(), supports substituion of NaNs
|
||||
ps.DataFrame.mod(self, other, axis=columns, fill_value=None) # mod(), supports substituion of NaNs
|
||||
pd.DataFrame.rmod(self, other, axis=columns, fill_value=None) # rmod(), supports substituion of NaNs
|
||||
pd.Series.mod(self, other, fill_value=None) # mod(), supports substitution of NaNs
|
||||
pd.Series.rmod(self, other, fill_value=None) # rmod(), supports substitution of NaNs
|
||||
ps.DataFrame.mod(self, other, axis=columns, fill_value=None) # mod(), supports substitution of NaNs
|
||||
pd.DataFrame.rmod(self, other, axis=columns, fill_value=None) # rmod(), supports substitution of NaNs
|
||||
# OTHER: {scalar, sequence, Series, DataFrame}
|
||||
# AXIS: {0, 1, index, columns} -- whether to compare by the index or columns
|
||||
# FILLVALUE: {None, float} -- fill missing value
|
||||
|
@ -281,10 +281,10 @@ pd.DataFrame.rmod(self, other, axis=columns, fill_value=None) # rmod(), support
|
|||
|
||||
```py
|
||||
other ** self
|
||||
pd.Series.pow(self, other, fill_value=None) # pow(), supports substituion of NaNs
|
||||
pd.Series.rpow(self, other, fill_value=None) # rpow(), supports substituion of NaNs
|
||||
ps.DataFrame.pow(self, other, axis=columns, fill_value=None) # pow(), supports substituion of NaNs
|
||||
pd.DataFrame.rpow(self, other, axis=columns, fill_value=None) # rpow(), supports substituion of NaNs
|
||||
pd.Series.pow(self, other, fill_value=None) # pow(), supports substitution of NaNs
|
||||
pd.Series.rpow(self, other, fill_value=None) # rpow(), supports substitution of NaNs
|
||||
ps.DataFrame.pow(self, other, axis=columns, fill_value=None) # pow(), supports substitution of NaNs
|
||||
pd.DataFrame.rpow(self, other, axis=columns, fill_value=None) # rpow(), supports substitution of NaNs
|
||||
# OTHER: {scalar, sequence, Series, DataFrame}
|
||||
# AXIS: {0, 1, index, columns} -- whether to compare by the index or columns
|
||||
# FILLVALUE: {None, float} -- fill missing value
|
||||
|
@ -299,7 +299,7 @@ NumPy ufuncs work fine with pandas objects.
|
|||
```py
|
||||
pd.DataFrame.applymap(self, func) # apply function element-wise
|
||||
|
||||
pd.DataFrame.apply(self, func, axis=0, args=()) # apllay a function along an axis of a DataFrame
|
||||
pd.DataFrame.apply(self, func, axis=0, args=()) # apply a function along an axis of a DataFrame
|
||||
# FUNC: {function} -- function to apply
|
||||
# AXIS: {O, 1, index, columns} -- axis along which the function is applied
|
||||
# ARGS: {tuple} -- positional arguments to pass to func in addition to the array/series
|
||||
|
@ -309,7 +309,7 @@ pd.Series.sort_values(self, ascending=True, **kwargs) # sort series by the valu
|
|||
# ASCENDING: {bool} -- if True, sort values in ascending order, otherwise descending -- DEFAULT True
|
||||
# INPALCE: {bool} -- if True, perform operation in-place
|
||||
# KIND: {quicksort, mergesort, heapsort} -- sorting algorithm
|
||||
# NA_POSITION {first, last} -- ‘first’ puts NaNs at the beginning, ‘last’ puts NaNs at the end
|
||||
# NA_POSITION {first, last} -- 'first' puts NaNs at the beginning, 'last' puts NaNs at the end
|
||||
|
||||
pd.DataFrame.sort_index(self, axis=0, ascending=True, **kwargs) # sort object by labels along an axis
|
||||
pd.DataFrame.sort_values(self, axis=0, ascending=True, **kwargs) # sort object by values along an axis
|
||||
|
@ -317,7 +317,7 @@ pd.DataFrame.sort_values(self, axis=0, ascending=True, **kwargs) # sort object
|
|||
# ASCENDING: {bool} -- if True, sort values in ascending order, otherwise descending -- DEFAULT True
|
||||
# INPALCE: {bool} -- if True, perform operation in-place
|
||||
# KIND: {quicksort, mergesort, heapsort} -- sorting algorithm
|
||||
# NA_POSITION {first, last} -- ‘first’ puts NaNs at the beginning, ‘last’ puts NaNs at the end
|
||||
# NA_POSITION {first, last} -- 'first' puts NaNs at the beginning, 'last' puts NaNs at the end
|
||||
```
|
||||
|
||||
## DESCRIPTIVE AND SUMMARY STATISTICS
|
||||
|
@ -332,7 +332,7 @@ pd.DataFrame.count(self, numeric_only=False) # count non-NA cells for each colu
|
|||
|
||||
### DESCRIBE
|
||||
|
||||
Generate descriptive statistics summarizing central tendency, dispersion and shape of dataset’s distribution (exclude NaN).
|
||||
Generate descriptive statistics summarizing central tendency, dispersion and shape of dataset's distribution (exclude NaN).
|
||||
|
||||
```py
|
||||
pd.Series.describe(self, percentiles=None, include=None, exclude=None)
|
||||
|
@ -350,7 +350,7 @@ pd.Series.min(self, skipna=None, numeric_only=None) # minimum of the values for
|
|||
pd.DataFrame.max(self, axis=None, skipna=None, numeric_only=None) # maximum of the values for the requested axis
|
||||
pd.DataFrame.min(self, axis=None, skipna=None, numeric_only=None) # minimum of the values for the requested axis
|
||||
# SKIPNA: {bool} -- exclude NA/null values when computing the result
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not immplemented for Series
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not implemented for Series
|
||||
```
|
||||
|
||||
### IDXMAX - IDXMIN
|
||||
|
@ -381,7 +381,7 @@ pd.Series.sum(self, skipna=None, numeric_only=None, min_count=0) # sum of the v
|
|||
pd.DataFrame.sum(self, axis=None, skipna=None, numeric_only=None, min_count=0) # sum of the values for the requested axis
|
||||
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
|
||||
# SKIPNA: {bool} -- exclude NA/null values when computing the result
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not immplemented for Series
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not implemented for Series
|
||||
# MIN_COUNT: {int} -- required number of valid values to perform the operation. if fewer than min_count non-NA values are present the result will be NA
|
||||
```
|
||||
|
||||
|
@ -392,7 +392,7 @@ pd.Series.mean(self, skipna=None, numeric_only=None) # mean of the values
|
|||
pd.DataFrame.mean(self, axis=None, skipna=None, numeric_only=None) # mean of the values for the requested axis
|
||||
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
|
||||
# SKIPNA: {bool} -- exclude NA/null values when computing the result
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not immplemented for Series
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not implemented for Series
|
||||
```
|
||||
|
||||
### MEDIAN
|
||||
|
@ -402,7 +402,7 @@ pd.Series.median(self, skipna=None, numeric_only=None) # median of the values
|
|||
pd.DataFrame.median(self, axis=None, skipna=None, numeric_only=None) # median of the values for the requested axis
|
||||
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
|
||||
# SKIPNA: {bool} -- exclude NA/null values when computing the result
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not immplemented for Series
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not implemented for Series
|
||||
```
|
||||
|
||||
### MAD (mean absolute deviation)
|
||||
|
@ -422,7 +422,7 @@ pd.DataFrame.var(self, axis=None, skipna=None, ddof=1, numeric_only=None) # un
|
|||
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
|
||||
# SKIPNA: {bool} -- exclude NA/null values. if an entire row/column is NA, the result will be NA
|
||||
# DDOF: {int} -- Delta Degrees of Freedom. divisor used in calculations is N - ddof (N represents the number of elements) -- DEFAULT 1
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not immplemented for Series
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not implemented for Series
|
||||
```
|
||||
|
||||
### STD (standard deviation)
|
||||
|
@ -433,7 +433,7 @@ pd.Dataframe.std(self, axis=None, skipna=None, ddof=1, numeric_only=None) # sam
|
|||
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
|
||||
# SKIPNA: {bool} -- exclude NA/null values. if an entire row/column is NA, the result will be NA
|
||||
# DDOF: {int} -- Delta Degrees of Freedom. divisor used in calculations is N - ddof (N represents the number of elements) -- DEFAULT 1
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not immplemented for Series
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not implemented for Series
|
||||
```
|
||||
|
||||
### SKEW
|
||||
|
@ -443,19 +443,19 @@ pd.Series.skew(self, skipna=None, numeric_only=None) # unbiased skew Normalized
|
|||
pd.DataFrame.skew(self, axis=None, skipna=None, numeric_only=None) # unbiased skew over requested axis Normalized by N-1
|
||||
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
|
||||
# SKIPNA: {bool} -- exclude NA/null values when computing the result
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not immplemented for Series
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not implemented for Series
|
||||
```
|
||||
|
||||
### KURT
|
||||
|
||||
Unbiased kurtosis over requested axis using Fisher’s definition of kurtosis (kurtosis of normal == 0.0). Normalized by N-1.
|
||||
Unbiased kurtosis over requested axis using Fisher's definition of kurtosis (kurtosis of normal == 0.0). Normalized by N-1.
|
||||
|
||||
```py
|
||||
pd.Series.kurt(self, skipna=None, numeric_only=None)
|
||||
pd.Dataframe.kurt(self, axis=None, skipna=None, numeric_only=None)
|
||||
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
|
||||
# SKIPNA: {bool} -- exclude NA/null values when computing the result
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not immplemented for Series
|
||||
# NUMERIC_ONLY: {bool} -- include only float, int, boolean columns, not implemented for Series
|
||||
```
|
||||
|
||||
### CUMSUM (cumulative sum)
|
||||
|
@ -471,7 +471,7 @@ pd.Dataframe.cumsum(self, axis=None, skipna=True) # cumulative sum over request
|
|||
|
||||
```py
|
||||
pd.Series.cummax(self, skipna=True) # cumulative maximum
|
||||
pd.Series.cummin(self, skipna=True) # cumulative minimumm
|
||||
pd.Series.cummin(self, skipna=True) # cumulative minimum
|
||||
pd.Dataframe.cummax(self, axis=None, skipna=True) # cumulative maximum over requested axis
|
||||
pd.Dataframe.cummin(self, axis=None, skipna=True) # cumulative minimum over requested axis
|
||||
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
|
||||
|
@ -499,7 +499,7 @@ pd.DataFrame.diff(self, periods=1, axis=0)
|
|||
# AXIS: {0, 1, index, columns} -- Take difference over rows or columns
|
||||
```
|
||||
|
||||
### PCT_CAHNGE
|
||||
### PCT_CHANGE
|
||||
|
||||
Percentage change between the current and a prior element.
|
||||
|
||||
|
@ -541,7 +541,7 @@ pd.DataFrame.fillna(self, value=None, method=None, axis=None, inplace=False, lim
|
|||
|
||||
## HIERARCHICAL INDEXING (MultiIndex)
|
||||
|
||||
Enables storing and manupulation of data with an arbitrary number of dimensions.
|
||||
Enables storing and manipulation of data with an arbitrary number of dimensions.
|
||||
In lower dimensional data structures like Series (1d) and DataFrame (2d).
|
||||
|
||||
### MULTIIINDEX CREATION
|
||||
|
@ -565,7 +565,7 @@ pd.MultiIndex.get_level_values(self, level)
|
|||
|
||||
### PARTIAL AND CROSS-SECTION SELECTION
|
||||
|
||||
Partial selection “drops” levels of the hierarchical index in the result in a completely analogous way to selecting a column in a regular DataFrame.
|
||||
Partial selection "drops" levels of the hierarchical index in the result in a completely analogous way to selecting a column in a regular DataFrame.
|
||||
|
||||
```py
|
||||
pd.Series.xs(self, key, axis=0, level=None, drop_level=True) # cross-section from Series
|
||||
|
@ -608,7 +608,7 @@ pd.MultiIndex.sortlevel(self, level=0, ascending=True, sort_remaining=True) # s
|
|||
pd.read_fwf(filepath, colspecs='infer', widths=None, infer_nrows=100) # read a table of fixed-width formatted lines into DataFrame
|
||||
# FILEPATH: {str, path object} -- any valid string path is acceptable, could be a URL. Valid URLs: http, ftp, s3, and file
|
||||
# COLSPECS: {list of tuple (int, int), 'infer'} -- list of tuples giving extents of fixed-width fields of each line as half-open intervals { [from, to) }
|
||||
# WIDTHS: {list of int} -- list of field widths which can be used instead of ‘colspecs’ if intervals are contiguous
|
||||
# WIDTHS: {list of int} -- list of field widths which can be used instead of "colspecs" if intervals are contiguous
|
||||
# INFER_ROWS: {int} -- number of rows to consider when letting parser determine colspecs -- DEFAULT 100
|
||||
|
||||
pd.read_excel() # read an Excel file into a pandas DataFrame
|
||||
|
@ -635,7 +635,7 @@ pd.DataFrame.to_csv(self, path_or_buf, sep=',', na_rep='', columns=None, header=
|
|||
# COLUMNS: {sequence} -- colums to write
|
||||
# HEADER: {bool, list of str} -- write out column names. if list of strings is given its assumed to be aliases for column names
|
||||
# INDEX: {bool, list of str} -- write out row names (index)
|
||||
# ENCODING: {str} -- string representing encoding to use -- DEFAULT ‘utf-8’
|
||||
# ENCODING: {str} -- string representing encoding to use -- DEFAULT "utf-8"
|
||||
# LINE_TERMINATOR: {str} -- newline character or character sequence to use in the output file -- DEFAULT os.linesep
|
||||
# DECIMAL: {str} -- character recognized as decimal separator (in EU ,)
|
||||
|
||||
|
|
|
@ -17,18 +17,18 @@ sns.set(style='darkgrid')
|
|||
|
||||
```python
|
||||
sns.replot(x='name_in_data', y='name_in_data', hue='point_color', size='point_size', style='point_shape', data=data)
|
||||
# HUE, SIZE and STYLE: {name in data} -- used to differenciate points, a sort-of 3rd dimention
|
||||
# HUE, SIZE and STYLE: {name in data} -- used to differentiate points, a sort-of 3rd dimension
|
||||
# hue behaves differently if the data is categorical or numerical, numerical uses a color gradient
|
||||
# SORT: {False, True} -- avoid sorting data in function of x
|
||||
# CI: {None, sd} -- avoid comuting confidence intervals or plot standard deviation
|
||||
# CI: {None, sd} -- avoid computing confidence intervals or plot standard deviation
|
||||
# (aggregate multiple measurements at each x value by plotting the mean and the 95% confidence interval around the mean)
|
||||
# ESTIMATOR: {None} -- turn off aggregation of multiple observations
|
||||
# MARKERS: {True, False} -- evidetiate observations with dots
|
||||
# DASHES: {True, False} -- evidetiate observations with dashes
|
||||
# MARKERS: {True, False} -- evidenziate observations with dots
|
||||
# DASHES: {True, False} -- evidenziate observations with dashes
|
||||
# COL, ROW: {name in data} -- categorical variables that will determine the grid of plots
|
||||
# COL_WRAP: {int} -- “Wrap” the column variable at this width, so that the column facets span multiple rows. Incompatible with a row facet.
|
||||
# COL_WRAP: {int} -- "Wrap" the column variable at this width, so that the column facets span multiple rows. Incompatible with a row facet.
|
||||
# SCATTERPLOT
|
||||
# depicts the joint distibution of two variables usinga a cloud of points
|
||||
# depicts the joint distribution of two variables using a cloud of points
|
||||
# kind can be omitted since scatterplot is the default for replot
|
||||
sns.replot(kind='scatter') # calls scatterplot()
|
||||
sns.scatterplot() # underlying axis-level function of replot()
|
||||
|
@ -45,16 +45,16 @@ sns.lineplot() # underlying axis-level function of replot()
|
|||
|
||||
## CATPLOT (categorical)
|
||||
|
||||
Categorical: dicided into discrete groups.
|
||||
Categorical: divided into discrete groups.
|
||||
|
||||
```python
|
||||
sns.catplot(x='name_in_data', y='name_in_data', data=data)
|
||||
# HUE: {name in data} -- used to differenciate points, a sort-of 3rd dimention
|
||||
# HUE: {name in data} -- used to differenziate points, a sort-of 3rd dimension
|
||||
# COL, ROW: {name in data} -- categorical variables that will determine the grid of plots
|
||||
# COL_WRAP: {int} -- “Wrap” the column variable at this width, so that the column facets span multiple rows. Incompatible with a row facet.
|
||||
# ORDER, HUE_ORDER: {list of strings} -- oreder of categorical levels of the plot
|
||||
# COL_WRAP: {int} -- "Wrap" the column variable at this width, so that the column facets span multiple rows. Incompatible with a row facet.
|
||||
# ORDER, HUE_ORDER: {list of strings} -- order of categorical levels of the plot
|
||||
# ROW_ORDER, COL_ORDER: {list of strings} -- order to organize the rows and/or columns of the grid in
|
||||
# ORIENT: {'v', 'h'} -- Orientation of the plot (can also swap x&y assignement)
|
||||
# ORIENT: {'v', 'h'} -- Orientation of the plot (can also swap x&y assignment)
|
||||
# COLOR: {matplotlib color} -- Color for all of the elements, or seed for a gradient palette
|
||||
# CATEGORICAL SCATTERPLOT - STRIPPLOT
|
||||
# adjust the positions of points on the categorical axis with a small amount of random “jitter”
|
||||
|
@ -85,7 +85,7 @@ sns.boxplot()
|
|||
Combines a boxplot with the kernel density estimation procedure.
|
||||
|
||||
```py
|
||||
sns.catplot(kind='violon')
|
||||
sns.catplot(kind='violin')
|
||||
sns.violonplot()
|
||||
```
|
||||
|
||||
|
@ -113,7 +113,7 @@ sns.pointplot()
|
|||
# JOIN: {bool} -- if True, lines will be drawn between point estimates at the same hue level
|
||||
# SCALE: {float} -- scale factor for the plot elements
|
||||
# ERRWIDTH: {float} -- thickness of error bar lines (and caps)
|
||||
# CAPSIZE: {float} -- width of the “caps” on error bars
|
||||
# CAPSIZE: {float} -- width of the "caps" on error bars
|
||||
```
|
||||
|
||||
### CATEGORICAL ESTIMATE - BARPLOT
|
||||
|
@ -126,7 +126,7 @@ sns.barplot()
|
|||
# CI: {float, sd} -- size of confidence intervals to draw around estimated values, sd -> standard deviation
|
||||
# ERRCOLOR: {matplotlib color} -- color for the lines that represent the confidence interval
|
||||
# ERRWIDTH: {float} -- thickness of error bar lines (and caps)
|
||||
# CAPSIZE: {float} -- width of the “caps” on error bars
|
||||
# CAPSIZE: {float} -- width of the "caps" on error bars
|
||||
# DODGE: {bool} -- whether elements should be shifted along the categorical axis if hue is used
|
||||
```
|
||||
|
||||
|
@ -173,7 +173,7 @@ sns.rugplot(a=data) # -> axes obj with plot on it
|
|||
Fit and plot a univariate or bivariate kernel density estimate.
|
||||
|
||||
```py
|
||||
# DATA: {1D array-like} -- inpoy data
|
||||
# DATA: {1D array-like} -- input data
|
||||
sns.kdeplot(data=data)
|
||||
# DATA2 {1D array-like} -- second input data. if present, a bivariate KDE will be estimated.
|
||||
# SHADE: {bool} -- if True, shade-in the area under KDE curve (or draw with filled contours is bivariate)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue