show line numbers in conde snippets

This commit is contained in:
Marcello 2023-10-20 18:22:46 +02:00
parent cd1df0e376
commit 255a68d673
82 changed files with 1249 additions and 1251 deletions

View file

@ -2,7 +2,7 @@
## Making the Soup
```py
```py linenums="1"
from bs4 import BeautifulSoup
import requests
@ -24,7 +24,7 @@ Beautiful Soup transforms a complex HTML document into a complex tree of Python
A Tag object corresponds to an XML or HTML tag in the original document
```py
```py linenums="1"
soup = BeautifulSoup('<b class="boldest">Extremely bold</b>', 'html.parser') # parse HTML/XML
tag = soup.b
@ -44,7 +44,7 @@ A string corresponds to a bit of text within a tag. Beautiful Soup uses the `Nav
### Going Down
```py
```py linenums="1"
soup.<tag>.<child_tag> # navigate using tag names
<tag>.contents # direct children as a list
@ -61,14 +61,14 @@ soup.<tag>.<child_tag> # navigate using tag names
### Going Up
```py
```py linenums="1"
<tag>.parent # tags direct parent (BeautifulSoup has parent None, html has parent BeautifulSoup)
<tag>.parents # iterable over all parents
```
### Going Sideways
```py
```py linenums="1"
<tag>.previous_sibling
<tag>.next_sibling
@ -78,7 +78,7 @@ soup.<tag>.<child_tag> # navigate using tag names
### Going Back and Forth
```py
```py linenums="1"
<tag>.previous_element # whatever was parsed immediately before
<tag>.next_element # whatever was parsed immediately afterwards
@ -90,7 +90,7 @@ soup.<tag>.<child_tag> # navigate using tag names
## Filter Types
```py
```py linenums="1"
soup.find_all("tag") # by name
soup.find_all(["tag1", "tag2"]) # multiple tags in a list
soup.find_all(function) # based on a bool function
@ -107,7 +107,7 @@ Methods arguments:
- `limit` (int). limit number of results
- `**kwargs`: be turned into a filter on one of a tag's attributes.
```py
```py linenums="1"
find_all(name, attrs, recursive, string, limit, **kwargs) # several results
find(name, attrs, recursive, string, **kwargs) # one result
@ -135,7 +135,7 @@ soup.select("css_selector") # search for CSS selectors of HTML tags
### Changing Tag Names an Attributes
```py
```py linenums="1"
<tag>.name = "new_html_tag" # modify the tag type
<tag>["attribute"] = "value" # modify the attribute value
del <tag>["attribute"] # remove the attribute

View file

@ -2,7 +2,7 @@
## MOST IMPORTANT ATTRIBUTES ATTRIBUTES
```py
```py linenums="1"
array.ndim # number of axes (dimensions) of the array
array.shape # dimensions of the array, tuple of integers
array.size # total number of elements in the array
@ -15,7 +15,7 @@ array.data # buffer containing the array elements
Unless explicitly specified `np.array` tries to infer a good data type for the array that it creates.
The data type is stored in a special dtype object.
```py
```py linenums="1"
var = np.array(sequence) # creates array
var = np.asarray(sequence) # convert input to array
var = np.ndarray(*sequence) # creates multidimensional array
@ -33,7 +33,7 @@ var = np.linspace(start, stop, num_of_elements) # step of elements calculated b
## DATA TYPES FOR NDARRAYS
```py
```py linenums="1"
var = array.astype(np.dtype) # copy of the array, cast to a specified type
# return TypeError if casting fails
```
@ -72,7 +72,7 @@ array_1 `/` array_2 --> element-wise division (`[1, 2, 3] / [3, 2, 1] = [0.33, 1
## SHAPE MANIPULATION
```py
```py linenums="1"
np.reshape(array, new_shape) # changes the shape of the array
np.ravel(array) # returns the array flattened
array.resize(shape) # modifies the array itself
@ -84,7 +84,7 @@ np.swapaxes(array, first_axis, second_axis) # interchange two axes of an array
## JOINING ARRAYS
```py
```py linenums="1"
np.vstack((array1, array2)) # takes tuple, vertical stack of arrays (column wise)
np.hstack((array1, array2)) # takes a tuple, horizontal stack of arrays (row wise)
np.dstack((array1, array2)) # takes a tuple, depth wise stack of arrays (3rd dimension)
@ -94,7 +94,7 @@ np.concatenate((array1, array2, ...), axis) # joins a sequence of arrays along a
## SPLITTING ARRAYS
```py
```py linenums="1"
np.split(array, indices) # splits an array into equall7 long sub-arrays (indices is int), if not possible raises error
np.vsplit(array, indices) # splits an array equally into sub-arrays vertically (row wise) if not possible raises error
np.hsplit(array, indices) # splits an array equally into sub-arrays horizontally (column wise) if not possible raises error
@ -104,7 +104,7 @@ np.array_split(array, indices) # splits an array into sub-arrays, arrays can be
## VIEW()
```py
```py linenums="1"
var = array.view() # creates a new array that looks at the same data
# slicing returns a view
# view shapes are separated but assignment changes all arrays
@ -112,7 +112,7 @@ var = array.view() # creates a new array that looks at the same data
## COPY()
```py
```py linenums="1"
var = array.copy() # creates a deep copy of the array
```
@ -136,7 +136,7 @@ iteration on first index, use .flat() to iterate over each element
Functions that performs element-wise operations (vectorization).
```py
```py linenums="1"
np.abs(array) # vectorized abs(), return element absolute value
np.fabs(array) # faster abs() for non-complex values
np.sqrt(array) # vectorized square root (x^0.5)
@ -193,7 +193,7 @@ np.logical_xor(x_array, y_array) # vectorized x ^ y
## CONDITIONAL LOGIC AS ARRAY OPERATIONS
```py
```py linenums="1"
np.where(condition, x, y) # return x if condition == True, y otherwise
```
@ -202,7 +202,7 @@ np.where(condition, x, y) # return x if condition == True, y otherwise
`np.method(array, args)` or `array.method(args)`.
Boolean values are coerced to 1 (`True`) and 0 (`False`).
```py
```py linenums="1"
np.sum(array, axis=None) # sum of array elements over a given axis
np.median(array, axis=None) # median along the specified axis
np.mean(array, axis=None) # arithmetic mean along the specified axis
@ -220,21 +220,21 @@ np.cumprod(array, axis=None) # cumulative sum of the elements along a given axi
## METHODS FOR BOOLEAN ARRAYS
```py
```py linenums="1"
np.all(array, axis=None) # test whether all array elements along a given axis evaluate to True
np.any(array, axis=None) # test whether any array element along a given axis evaluates to True
```
## SORTING
```py
```py linenums="1"
array.sort(axis=-1) # sort an array in-place (axis = None applies on flattened array)
np.sort(array, axis=-1) # return a sorted copy of an array (axis = None applies on flattened array)
```
## SET LOGIC
```py
```py linenums="1"
np.unique(array) # sorted unique elements of an array
np.intersect1d(x, y) # sorted common elements in x and y
np.union1d(x, y) # sorte union of elements
@ -245,7 +245,7 @@ np.setxor1d() # Set symmetric differences; elements that are in either of the a
## FILE I/O WITH ARRAYS
```py
```py linenums="1"
np.save(file, array) # save array to binary file in .npy format
np.savez(file, *array) # save several arrays into a single file in uncompressed .npz format
np.savez_compressed(file, *args, *kwargs) # save several arrays into a single file in compressed .npz format
@ -266,7 +266,7 @@ np.loadtxt(file, dtype=float, comments="#", delimiter=None)
## LINEAR ALGEBRA
```py
```py linenums="1"
np.diag(array, k=0) # extract a diagonal or construct a diagonal array
# K: {int} -- k>0 diagonals above main diagonal, k<0 diagonals below main diagonal (main diagonal k = 0)
@ -290,7 +290,7 @@ np.linalg.lstsq(A, B) # return the least-squares solution to a linear matrix eq
## RANDOM NUMBER GENERATION
```py
```py linenums="1"
np.random.seed()
np.random.rand()
np.random.randn()

View file

@ -2,7 +2,7 @@
## Basic Pandas Imports
```py
```py linenums="1"
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
@ -13,7 +13,7 @@ from pandas import Series, DataFrame
1-dimensional labelled array, axis label referred as INDEX.
Index can contain repetitions.
```py
```py linenums="1"
s = Series(data, index=index, name='name')
# DATA: {python dict, ndarray, scalar value}
# NAME: {string}
@ -22,7 +22,7 @@ s = Series(dict) # Series created from python dict, dict keys become index valu
### INDEXING / SELECTION / SLICING
```py
```py linenums="1"
s['index'] # selection by index label
s[condition] # return slice selected by condition
s[ : ] # slice endpoint included
@ -34,7 +34,7 @@ s[condition] = *value # modify slice by condition
Missing data appears as NaN (Not a Number).
```py
```py linenums="1"
pd.isnull(array) # return a Series index-bool indicating which indexes don't have data
pd.notnull(array) # return a Series index-bool indicating which indexes have data
array.isnull()
@ -43,7 +43,7 @@ array.notnull()
### SERIES ATTRIBUTES
```py
```py linenums="1"
s.values # NumPy representation of Series
s.index # index object of Series
s.name = "Series name" # renames Series object
@ -52,7 +52,7 @@ s.index.name = "index name" # renames index
### SERIES METHODS
```py
```py linenums="1"
pd.Series.isin(self, values) # boolean Series showing whether elements in Series matches elements in values exactly
# Conform Series to new index, new object produced unless the new index is equivalent to current one and copy=False
@ -80,7 +80,7 @@ pd.Series.value_counts(self, normalize=False, sort=True, ascending=False, bins=N
2-dimensional labeled data structure with columns of potentially different types.
Index and columns can contain repetitions.
```py
```py linenums="1"
df = DataFrame(data, index=row_labels, columns=column_labels)
# DATA: {list, dict (of lists), nested dicts, series, dict of 1D ndarray, 2D ndarray, DataFrame}
# INDEX: {list of row_labels}
@ -112,7 +112,7 @@ del df[col] # delete column
### DATAFRAME ATTRIBUTES
```py
```py linenums="1"
df.index # row labels
df.columns # column labels
df.values # NumPy representation of DataFrame
@ -123,7 +123,7 @@ df.T # transpose
### DATAFRAME METHODS
```py
```py linenums="1"
pd.DataFrame.isin(self , values) # boolean DataFrame showing whether elements in DataFrame matches elements in values exactly
# Conform DataFrame to new index, new object produced unless the new index is equivalent to current one and copy=False
@ -146,7 +146,7 @@ Holds axis labels and metadata, immutable.
### INDEX TYPES
```py
```py linenums="1"
pd.Index # immutable ordered ndarray, sliceable. stores axis labels
pd.Int64Index # special case of Index with purely integer labels
pd.MultiIndex # multi-level (hierarchical) index object for pandas objects
@ -156,7 +156,7 @@ pd.DatetimeIndex # nanosecond timestamps (uses Numpy datetime64)
### INDEX ATTRIBUTERS
```py
```py linenums="1"
pd.Index.is_monotonic_increasing # Return True if the index is monotonic increasing (only equal or increasing) values
pd.Index.is_monotonic_decreasing # Return True if the index is monotonic decreasing (only equal or decreasing) values
pd.Index.is_unique # Return True if the index has unique values.
@ -165,7 +165,7 @@ pd.Index.hasnans # Return True if the index has NaNs
### INDEX METHODS
```py
```py linenums="1"
pd.Index.append(self, other) # append a collection of Index options together
pd.Index.difference(self, other, sort=None) # set difference of two Index objects
@ -197,7 +197,7 @@ Missing values propagate in arithmetic computations (NaN `<operator>` value = Na
### ADDITION
```py
```py linenums="1"
self + other
pd.Series.add(self, other, fill_value=None) # add(), supports substitution of NaNs
pd,Series.radd(self, other, fill_value=None) # radd(), supports substitution of NaNs
@ -210,7 +210,7 @@ pd.DataFrame.radd(self, other, axis=columns, fill_value=None) # radd(), support
### SUBTRACTION
```py
```py linenums="1"
self - other
pd.Series.sub(self, other, fill_value=None) # sub(), supports substitution of NaNs
pd.Series.radd(self, other, fill_value=None) # radd(), supports substitution of NaNs
@ -223,7 +223,7 @@ pd.DataFrame.rsub(self, other, axis=columns, fill_value=None) # rsub(), support
### MULTIPLICATION
```py
```py linenums="1"
self * other
pd.Series.mul(self, other, fill_value=None) # mul(), supports substitution of NaNs
pd.Series.rmul(self, other, fill_value=None) # rmul(), supports substitution of NaNs
@ -236,7 +236,7 @@ pd.DataFrame.rmul(self, other, axis=columns, fill_value=None) # rmul(), support
### DIVISION (float division)
```py
```py linenums="1"
self / other
pd.Series.div(self, other, fill_value=None) # div(), supports substitution of NaNs
pd.Series.rdiv(self, other, fill_value=None) # rdiv(), supports substitution of NaNs
@ -253,7 +253,7 @@ pd.DataFrame.rtruediv(self, other, axis=columns, fill_value=None) # rtruediv(),
### FLOOR DIVISION
```py
```py linenums="1"
self // other
pd.Series.floordiv(self, other, fill_value=None) # floordiv(), supports substitution of NaNs
pd.Series.rfloordiv(self, other, fill_value=None) # rfloordiv(), supports substitution of NaNs
@ -266,7 +266,7 @@ pd.DataFrame.rfloordiv(self, other, axis=columns, fill_value=None) # rfloordiv(
### MODULO
```py
```py linenums="1"
self % other
pd.Series.mod(self, other, fill_value=None) # mod(), supports substitution of NaNs
pd.Series.rmod(self, other, fill_value=None) # rmod(), supports substitution of NaNs
@ -279,7 +279,7 @@ pd.DataFrame.rmod(self, other, axis=columns, fill_value=None) # rmod(), support
### POWER
```py
```py linenums="1"
other ** self
pd.Series.pow(self, other, fill_value=None) # pow(), supports substitution of NaNs
pd.Series.rpow(self, other, fill_value=None) # rpow(), supports substitution of NaNs
@ -296,7 +296,7 @@ pd.DataFrame.rpow(self, other, axis=columns, fill_value=None) # rpow(), support
NumPy ufuncs work fine with pandas objects.
```py
```py linenums="1"
pd.DataFrame.applymap(self, func) # apply function element-wise
pd.DataFrame.apply(self, func, axis=0, args=()) # apply a function along an axis of a DataFrame
@ -324,7 +324,7 @@ pd.DataFrame.sort_values(self, axis=0, ascending=True, **kwargs) # sort object
### COUNT
```py
```py linenums="1"
pd.Series.count(self) # return number of non-NA/null observations in the Series
pd.DataFrame.count(self, numeric_only=False) # count non-NA cells for each column or row
# NUMERIC_ONLY: {bool} -- Include only float, int or boolean data -- DEFAULT False
@ -334,7 +334,7 @@ pd.DataFrame.count(self, numeric_only=False) # count non-NA cells for each colu
Generate descriptive statistics summarizing central tendency, dispersion and shape of dataset's distribution (exclude NaN).
```py
```py linenums="1"
pd.Series.describe(self, percentiles=None, include=None, exclude=None)
pd.DataFrame.describe(self, percentiles=None, include=None, exclude=None)
# PERCENTILES: {list-like of numbers} -- percentiles to include in output,between 0 and 1 -- DEFAULT [.25, .5, .75]
@ -344,7 +344,7 @@ pd.DataFrame.describe(self, percentiles=None, include=None, exclude=None)
### MAX - MIN
```py
```py linenums="1"
pd.Series.max(self, skipna=None, numeric_only=None) # maximum of the values for the requested axis
pd.Series.min(self, skipna=None, numeric_only=None) # minimum of the values for the requested axis
pd.DataFrame.max(self, axis=None, skipna=None, numeric_only=None) # maximum of the values for the requested axis
@ -355,7 +355,7 @@ pd.DataFrame.min(self, axis=None, skipna=None, numeric_only=None) # minimum of
### IDXMAX - IDXMIN
```py
```py linenums="1"
pd.Series.idxmax(self, skipna=True) # row label of the maximum value
pd.Series.idxmin(self, skipna=True) # row label of the minimum value
pd.DataFrame.idxmax(self, axis=0, skipna=True) # Return index of first occurrence of maximum over requested axis
@ -366,7 +366,7 @@ pd.DataFrame.idxmin(self, axis=0, skipna=True) # Return index of first occurre
### QUANTILE
```py
```py linenums="1"
pd.Series.quantile(self, q=0.5, interpolation='linear') # return values at the given quantile
pd.DataFrame.quantile(self, q=0.5, axis=0, numeric_only=True, interpolation='linear') # return values at the given quantile over requested axis
# Q: {flaot, array} -- value between 0 <= q <= 1, the quantile(s) to compute -- DEFAULT 0.5 (50%)
@ -376,7 +376,7 @@ pd.DataFrame.quantile(self, q=0.5, axis=0, numeric_only=True, interpolation='lin
### SUM
```py
```py linenums="1"
pd.Series.sum(self, skipna=None, numeric_only=None, min_count=0) # sum of the values
pd.DataFrame.sum(self, axis=None, skipna=None, numeric_only=None, min_count=0) # sum of the values for the requested axis
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
@ -387,7 +387,7 @@ pd.DataFrame.sum(self, axis=None, skipna=None, numeric_only=None, min_count=0)
### MEAN
```py
```py linenums="1"
pd.Series.mean(self, skipna=None, numeric_only=None) # mean of the values
pd.DataFrame.mean(self, axis=None, skipna=None, numeric_only=None) # mean of the values for the requested axis
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
@ -397,7 +397,7 @@ pd.DataFrame.mean(self, axis=None, skipna=None, numeric_only=None) # mean of th
### MEDIAN
```py
```py linenums="1"
pd.Series.median(self, skipna=None, numeric_only=None) # median of the values
pd.DataFrame.median(self, axis=None, skipna=None, numeric_only=None) # median of the values for the requested axis
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
@ -407,7 +407,7 @@ pd.DataFrame.median(self, axis=None, skipna=None, numeric_only=None) # median o
### MAD (mean absolute deviation)
```py
```py linenums="1"
pd.Series.mad(self, skipna=None) # mean absolute deviation
pd.DataFrame.mad(self, axis=None, skipna=None) # mean absolute deviation of the values for the requested axis
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
@ -416,7 +416,7 @@ pd.DataFrame.mad(self, axis=None, skipna=None) # mean absolute deviation of the
### VAR (variance)
```py
```py linenums="1"
pd.Series.var(self, skipna=None, numeric_only=None) # unbiased variance
pd.DataFrame.var(self, axis=None, skipna=None, ddof=1, numeric_only=None) # unbiased variance over requested axis
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
@ -427,7 +427,7 @@ pd.DataFrame.var(self, axis=None, skipna=None, ddof=1, numeric_only=None) # un
### STD (standard deviation)
```py
```py linenums="1"
pd.Series.std(self, skipna=None, ddof=1, numeric_only=None) # sample standard deviation
pd.Dataframe.std(self, axis=None, skipna=None, ddof=1, numeric_only=None) # sample standard deviation over requested axis
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
@ -438,7 +438,7 @@ pd.Dataframe.std(self, axis=None, skipna=None, ddof=1, numeric_only=None) # sam
### SKEW
```py
```py linenums="1"
pd.Series.skew(self, skipna=None, numeric_only=None) # unbiased skew Normalized bt N-1
pd.DataFrame.skew(self, axis=None, skipna=None, numeric_only=None) # unbiased skew over requested axis Normalized by N-1
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
@ -450,7 +450,7 @@ pd.DataFrame.skew(self, axis=None, skipna=None, numeric_only=None) # unbiased
Unbiased kurtosis over requested axis using Fisher's definition of kurtosis (kurtosis of normal == 0.0). Normalized by N-1.
```py
```py linenums="1"
pd.Series.kurt(self, skipna=None, numeric_only=None)
pd.Dataframe.kurt(self, axis=None, skipna=None, numeric_only=None)
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
@ -460,7 +460,7 @@ pd.Dataframe.kurt(self, axis=None, skipna=None, numeric_only=None)
### CUMSUM (cumulative sum)
```py
```py linenums="1"
pd.Series.cumsum(self, skipna=True) # cumulative sum
pd.Dataframe.cumsum(self, axis=None, skipna=True) # cumulative sum over requested axis
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
@ -469,7 +469,7 @@ pd.Dataframe.cumsum(self, axis=None, skipna=True) # cumulative sum over request
### CUMMAX - CUMMIN (cumulative maximum - minimum)
```py
```py linenums="1"
pd.Series.cummax(self, skipna=True) # cumulative maximum
pd.Series.cummin(self, skipna=True) # cumulative minimum
pd.Dataframe.cummax(self, axis=None, skipna=True) # cumulative maximum over requested axis
@ -480,7 +480,7 @@ pd.Dataframe.cummin(self, axis=None, skipna=True) # cumulative minimum over req
### CUMPROD (cumulative product)
```py
```py linenums="1"
pd.Series.cumprod(self, skipna=True) # cumulative product
pd.Dataframe.cumprod(self, axis=None, skipna=True) # cumulative product over requested axis
# AXIS: {0, 1, index, columns} -- axis for the function to be applied on
@ -492,7 +492,7 @@ pd.Dataframe.cumprod(self, axis=None, skipna=True) # cumulative product over re
Calculates the difference of a DataFrame element compared with another element in the DataFrame.
(default is the element in the same column of the previous row)
```py
```py linenums="1"
pd.Series.diff(self, periods=1)
pd.DataFrame.diff(self, periods=1, axis=0)
# PERIODS: {int} -- Periods to shift for calculating difference, accepts negative values -- DEFAULT 1
@ -503,7 +503,7 @@ pd.DataFrame.diff(self, periods=1, axis=0)
Percentage change between the current and a prior element.
```py
```py linenums="1"
pd.Series.Pct_change(self, periods=1, fill_method='pad', limit=None, freq=None)
pd.Dataframe.pct_change(self, periods=1, fill_method='pad', limit=None)
# PERIODS:{int} -- periods to shift for forming percent change
@ -515,7 +515,7 @@ pd.Dataframe.pct_change(self, periods=1, fill_method='pad', limit=None)
### FILTERING OUT MISSING DATA
```py
```py linenums="1"
pd.Series.dropna(self, inplace=False) # return a new Series with missing values removed
pd.DataFrame.dropna(axis=0, how='any', tresh=None, subset=None, inplace=False) # return a new DataFrame with missing values removed
# AXIS: {tuple, list} -- tuple or list to drop on multiple axes. only a single axis is allowed
@ -529,7 +529,7 @@ pd.DataFrame.dropna(axis=0, how='any', tresh=None, subset=None, inplace=False)
Fill NA/NaN values using the specified method.
```py
```py linenums="1"
pd.Series.fillna(self, value=None, method=None, inplace=False, limit=None)
pd.DataFrame.fillna(self, value=None, method=None, axis=None, inplace=False, limit=None)
# VALUE: {scalar, dict, Series, DataFrame} -- value to use to fill holes, dict/Series/DataFrame specifying which value to use for each index or column
@ -546,7 +546,7 @@ In lower dimensional data structures like Series (1d) and DataFrame (2d).
### MULTIIINDEX CREATION
```py
```py linenums="1"
pd.MultiIndex.from_arrays(*arrays, names=None) # convert arrays to MultiIndex
pd.MultiIndex.from_tuples(*arrays, names=None) # convert tuples to MultiIndex
pd.MultiIndex.from_frame(df, names=None) # convert DataFrame to MultiIndex
@ -559,7 +559,7 @@ pd.DataFrame(*arrays) # Index constructor makes MultiINdex from DataFrame
Vector of label values for requested level, equal to the length of the index.
```py
```py linenums="1"
pd.MultiIndex.get_level_values(self, level)
```
@ -567,7 +567,7 @@ pd.MultiIndex.get_level_values(self, level)
Partial selection "drops" levels of the hierarchical index in the result in a completely analogous way to selecting a column in a regular DataFrame.
```py
```py linenums="1"
pd.Series.xs(self, key, axis=0, level=None, drop_level=True) # cross-section from Series
pd.DataFrame.xs(self, key, axis=0, level=None, drop_level=True) # cross-section from DataFrame
# KEY: {label, tuple of label} -- label contained in the index, or partially in a MultiIndex
@ -580,7 +580,7 @@ pd.DataFrame.xs(self, key, axis=0, level=None, drop_level=True) # cross-sectio
Multi index keys take the form of tuples.
```py
```py linenums="1"
df.loc[('lvl_1', 'lvl_2', ...)] # selection of single row
df.loc[('idx_lvl_1', 'idx_lvl_2', ...), ('col_lvl_1', 'col_lvl_2', ...)] # selection of single value
@ -591,7 +591,7 @@ df.loc[('idx_lvl_1', 'idx_lvl_2') : ('idx_lvl_1', 'idx_lvl_2')] # slice of rows
### REORDERING AND SORTING LEVELS
```py
```py linenums="1"
pd.MultiIndex.swaplevel(self, i=-2, j=-1) # swap level i with level j
pd.Series.swaplevel(self, i=-2, j=-1) # swap levels i and j in a MultiIndex
pd.DataFrame.swaplevel(self, i=-2, j=-1, axis=0) # swap levels i and j in a MultiIndex on a partivular axis
@ -604,7 +604,7 @@ pd.MultiIndex.sortlevel(self, level=0, ascending=True, sort_remaining=True) # s
## DATA LOADING, STORAGE FILE FORMATS
```py
```py linenums="1"
pd.read_fwf(filepath, colspecs='infer', widths=None, infer_nrows=100) # read a table of fixed-width formatted lines into DataFrame
# FILEPATH: {str, path object} -- any valid string path is acceptable, could be a URL. Valid URLs: http, ftp, s3, and file
# COLSPECS: {list of tuple (int, int), 'infer'} -- list of tuples giving extents of fixed-width fields of each line as half-open intervals { [from, to) }

View file

@ -4,7 +4,7 @@
Get or retrieve data from specified resource
```py
```py linenums="1"
response = requests.get('URL') # returns response object
# PAYLOAD -> valuable information of response
@ -18,7 +18,7 @@ The response message consists of:
- empty line
- optional message body
```text
```text linenums="1"
1xx -> INFORMATIONAL RESPONSE
2xx -> SUCCESS
200 OK -> request successful
@ -28,7 +28,7 @@ The response message consists of:
5xx -> SERVER ERRORS
```
```py
```py linenums="1"
# raise exception HTTPError for error status codes
response.raise_for_status()
@ -41,7 +41,7 @@ response.headers # response headers (dict)
### QUERY STRING PARAMETERS
```py
```py linenums="1"
response = requests.get('URL', params={'q':'query'})
response = requests.get('URL', params=[('q', 'query')])
response = requests.get('URL', params=b'q=query')
@ -49,7 +49,7 @@ response = requests.get('URL', params=b'q=query')
### REQUEST HEADERS
```py
```py linenums="1"
response = requests.get(
'URL',
params={'q': 'query'},
@ -61,7 +61,7 @@ response = requests.get(
### DATA INPUT
```py
```py linenums="1"
# requests that entity enclosed be stored as a new subordinate of the web resource identified by the URI
requests.post('URL', data={'key':'value'})
# requests that the enclosed entity be stored under the supplied URI
@ -78,13 +78,13 @@ requests.options('URL')
### SENDING JSON DATA
```py
```py linenums="1"
requests.post('URL', json={'key': 'value'})
```
### INSPECTING THE REQUEST
```py
```py linenums="1"
# requests lib prepares the requests before sending it
response = requests.post('URL', data={'key':'value'})
response.request.something # inspect request field
@ -92,7 +92,7 @@ response.request.something # inspect request field
## AUTHENTICATION
```py
```py linenums="1"
requests.get('URL', auth=('username', 'password')) # use implicit HTTP Basic Authorization
# explicit HTTP Basic Authorization and other
@ -103,7 +103,7 @@ requests.get('URL', auth=HTTPBasicAuth('username', getpass()))
### PERSONALIZED AUTH
```py
```py linenums="1"
from requests.auth import AuthBase
class TokenAuth(AuthBase):
"custom authentication scheme"
@ -121,7 +121,7 @@ requests.get('URL', auth=TokenAuth('1234abcde-token'))
### DISABLING SSL VERIFICATION
```py
```py linenums="1"
requests.get('URL', verify=False)
```
@ -129,14 +129,14 @@ requests.get('URL', verify=False)
### REQUEST TIMEOUT
```py
```py linenums="1"
# raise Timeout exception if request times out
requests.get('URL', timeout=(connection_timeout, read_timeout))
```
### MAX RETRIES
```py
```py linenums="1"
from requests.adapters import HTTPAdapter
URL_adapter = HTTPAdapter(max_retries = int)
session = requests.Session()

View file

@ -2,7 +2,7 @@
## Basic Imports For Seaborn
```python
```python linenums="1"
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
@ -15,7 +15,7 @@ sns.set(style='darkgrid')
## REPLOT (relationship)
```python
```python linenums="1"
sns.replot(x='name_in_data', y='name_in_data', hue='point_color', size='point_size', style='point_shape', data=data)
# HUE, SIZE and STYLE: {name in data} -- used to differentiate points, a sort-of 3rd dimension
# hue behaves differently if the data is categorical or numerical, numerical uses a color gradient
@ -38,7 +38,7 @@ sns.scatterplot() # underlying axis-level function of replot()
Using semantics in lineplot will determine the aggregation of data.
```python
```python linenums="1"
sns.replot(ci=None, sort=bool, kind='line')
sns.lineplot() # underlying axis-level function of replot()
```
@ -47,7 +47,7 @@ sns.lineplot() # underlying axis-level function of replot()
Categorical: divided into discrete groups.
```python
```python linenums="1"
sns.catplot(x='name_in_data', y='name_in_data', data=data)
# HUE: {name in data} -- used to differenziate points, a sort-of 3rd dimension
# COL, ROW: {name in data} -- categorical variables that will determine the grid of plots
@ -68,7 +68,7 @@ sns.stripplot()
Adjusts the points along the categorical axis preventing overlap.
```py
```py linenums="1"
sns.catplot(kind='swarm')
sns.swarmplot()
# SIZE: {float} -- Diameter of the markers, in points
@ -84,7 +84,7 @@ sns.boxplot()
Combines a boxplot with the kernel density estimation procedure.
```py
```py linenums="1"
sns.catplot(kind='violin')
sns.violonplot()
```
@ -94,7 +94,7 @@ sns.violonplot()
Plot similar to boxplot but optimized for showing more information about the shape of the distribution.
It is best suited for larger datasets.
```py
```py linenums="1"
sns.catplot(kind='boxen')
sns.boxenplot()
```
@ -103,7 +103,7 @@ sns.boxenplot()
Show point estimates and confidence intervals using scatter plot glyphs.
```py
```py linenums="1"
sns.catplot(kind='point')
sns.pointplot()
# CI: {float, sd} -- size of confidence intervals to draw around estimated values, sd -> standard deviation
@ -120,7 +120,7 @@ sns.pointplot()
Show point estimates and confidence intervals as rectangular bars.
```py
```py linenums="1"
sns.catplot(kind='bar')
sns.barplot()
# CI: {float, sd} -- size of confidence intervals to draw around estimated values, sd -> standard deviation
@ -134,7 +134,7 @@ sns.barplot()
Show the counts of observations in each categorical bin using bars.
```py
```py linenums="1"
sns.catplot(kind='count')
sns.countplot()
# DODGE: {bool} -- whether elements should be shifted along the categorical axis if hue is used
@ -146,7 +146,7 @@ sns.countplot()
Flexibly plot a univariate distribution of observations
```py
```py linenums="1"
# A: {series, 1d-array, list}
sns.distplot(a=data)
# BINS: {None, arg for matplotlib hist()} -- specification of hist bins, or None to use Freedman-Diaconis rule
@ -160,7 +160,7 @@ sns.distplot(a=data)
Plot datapoints in an array as sticks on an axis.
```py
```py linenums="1"
# A: {vector} -- 1D array of observations
sns.rugplot(a=data) # -> axes obj with plot on it
# HEIGHT: {scalar} -- height of ticks as proportion of the axis
@ -172,7 +172,7 @@ sns.rugplot(a=data) # -> axes obj with plot on it
Fit and plot a univariate or bivariate kernel density estimate.
```py
```py linenums="1"
# DATA: {1D array-like} -- input data
sns.kdeplot(data=data)
# DATA2 {1D array-like} -- second input data. if present, a bivariate KDE will be estimated.
@ -185,7 +185,7 @@ sns.kdeplot(data=data)
Draw a plot of two variables with bivariate and univariate graphs.
```py
```py linenums="1"
# X, Y: {string, vector} -- data or names of variables in data
sns.jointplot(x=data, y=data)
# DATA:{pandas DataFrame} -- DataFrame when x and y are variable names
@ -203,7 +203,7 @@ sns.jointplot(x=data, y=data)
Plot pairwise relationships in a dataset.
```py
```py linenums="1"
# DATA: {pandas DataFrame} -- tidy (long-form) dataframe where each column is a variable and each row is an observation
sns.pairplot(data=pd.DataFrame)
# HUE: {string (variable name)} -- variable in data to map plot aspects to different colors

View file

@ -2,7 +2,7 @@
## Standard Imports
```py
```py linenums="1"
from tkinter import * # import Python Tk Binding
from tkinter import ttk # import Themed Widgets
```
@ -19,7 +19,7 @@ geometry managers determine size and oder widget drawing properties
event loop receives events from the OS
customizable events provide a callback as a widget configuration
```py
```py linenums="1"
widget.bind('event', function) # method to capture any event and than execute an arbitrary piece of code (generally a function or lambda)
```
@ -29,7 +29,7 @@ VIRTUAL EVENT --> hig level event generated by widget (listed in widget docs)
Widgets are objects and all things on screen. All widgets are children of a window.
```py
```py linenums="1"
widget_name = tk_object(parent_window) # widget is inserted into widget hierarchy
```
@ -37,7 +37,7 @@ widget_name = tk_object(parent_window) # widget is inserted into widget hierarc
Displays a single rectangle, used as container for other widgets
```py
```py linenums="1"
frame = ttk.Frame(parent, width=None, height=None, borderwidth=num:int)
# BORDERWIDTH: sets frame border width (default: 0)
# width, height MUST be specified if frame is empty, otherwise determined by parent geometry manager
@ -47,7 +47,7 @@ frame = ttk.Frame(parent, width=None, height=None, borderwidth=num:int)
Extra space inside widget (margin).
```py
```py linenums="1"
frame['padding'] = num # same padding for every border
frame['padding'] = (horizontal, vertical) # set horizontal THEN vertical padding
frame['padding'] = (left, top, right, bottom) # set left, top, right, bottom padding
@ -60,13 +60,13 @@ frame['relief'] = border_style
Display text or image without interactivity.
```py
```py linenums="1"
label = ttk.Label(parent, text='label text')
```
### DEFINING UPDATING LABEL
```py
```py linenums="1"
var = StringVar() # variable containing text, watches for changes. Use get, set methods to interact with the value
label['textvariable'] = var # attach var to label (only of type StringVar)
var.set("new text label") # change label text
@ -74,14 +74,14 @@ var.set("new text label") # change label text
### DISPLAY IMAGES (2 steps)
```py
```py linenums="1"
image = PhotoImage(file='filename') # create image object
label['image'] = image # use image config
```
### DISPLAY IMAGE AND-OR TEXT
```py
```py linenums="1"
label['compound'] = value
```
@ -97,20 +97,20 @@ Compound value:
Specifies edge or corner that the label is attached.
```py
```py linenums="1"
label['anchor'] = compass_direction #compass_direction: n, ne, e, se, s, sw, w, nw, center
```
### MULTI-LINE TEXT WRAP
```py
```py linenums="1"
# use \n for multi line text
label['wraplength'] = size # max line length
```
### CONTROL TEXT JUSTIFICATION
```py
```py linenums="1"
label['justify'] = value #value: left, center, right
label['relief'] = label_style
@ -120,7 +120,7 @@ label['background'] = color # color passed with name or HEX RGB codes
### FONT STYLE (use with caution)
```py
```py linenums="1"
# used outside style option
label['font'] = font
```
@ -141,19 +141,19 @@ Fonts:
Press to perform some action
```py
```py linenums="1"
button = ttk.Button(parent, text='button_text', command=action_performed)
```
### TEXT or IMAGE
```py
```py linenums="1"
button['text/textvariable'], button['image'], button['compound']
```
### BUTTON INVOCATION
```py
```py linenums="1"
button.invoke() # button activation in the program
```
@ -161,7 +161,7 @@ button.invoke() # button activation in the program
Activate or deactivate the widget.
```py
```py linenums="1"
button.state(['disabled']) # set the disabled flag, disabling the button
button.state(['!disabled']) # clear the disabled flag
button.instate(['disabled']) # return true if the button is disabled, else false
@ -174,7 +174,7 @@ button.instate(['!disabled'], cmd) # execute 'cmd' if the button is not disable
Button with binary value of some kind (e.g a toggle) and also invokes a command callback
```py
```py linenums="1"
checkbutton_var = TkVarType
check = ttk.Checkbutton(parent, text='button text', command=action_performed, variable=checkbutton_var, onvalue=value_on, offvalue=value_off)
```
@ -189,7 +189,7 @@ checkbutton won't set the linked variable (MUST be done in the program)
### CONFIG OPTIONS
```py
```py linenums="1"
check['text/textvariable']
check['image']
check['compound']
@ -201,7 +201,7 @@ check.instate(['flag'])
Multiple-choice selection (good if options are few).
```py
```py linenums="1"
#RADIOBUTTON CREATION (usually as a set)
radio_var = TkVarType
radio_1 = ttk.Radiobutton(parent, text='button text', variable=radio_var, value=button_1_value)
@ -221,7 +221,7 @@ radio.instate(['flag'])
Single line text field accepting a string.
```py
```py linenums="1"
entry_var = StringVar()
entry = ttk.Entry(parent, textvariable=entry_var, width=char_num, show=symbol)
# SHOW: replaces the entry test with symbol, used for password
@ -230,7 +230,7 @@ entry = ttk.Entry(parent, textvariable=entry_var, width=char_num, show=symbol)
### CHANGE ENTRY VALUE
```py
```py linenums="1"
entry.get() # returns entry value
entry.delete(start, 'end') # delete between two indices, 0-based
entry.insert(index, 'text value') # insert new text at a given index
@ -238,7 +238,7 @@ entry.insert(index, 'text value') # insert new text at a given index
### ENTRY CONFIG OPTIONS
```py
```py linenums="1"
radio.state(['flag'])
radio.instate(['flag'])
```
@ -247,7 +247,7 @@ radio.instate(['flag'])
Drop-down list of available options.
```py
```py linenums="1"
combobox_var = StringVar()
combo = ttk.Combobox(parent, textvariable=combobox_var)
combobox.get() # return combobox current value
@ -259,7 +259,7 @@ combobox.bind('<<ComboboxSelected>>', function)
### PREDEFINED VALUES
```py
```py linenums="1"
combobox['values'] = (value_1, value_2, ...) # provides a list of choose-able values
combobox.state(['readonly']) # restricts choose-able values to those provided with 'values' config option
# SUGGESTION: call selection clear method on value change (on ComboboxSelected event) to avoid visual oddities
@ -269,7 +269,7 @@ combobox.state(['readonly']) # restricts choose-able values to those provided w
Display list of single-line items, allows browsing and multiple selection (part og Tk classic, missing in themed Tk widgets).
```py
```py linenums="1"
lstbx = Listbox(parent, height=num, listvariable=item_list:list)
# listvariable links a variable (MUST BE a list) to the listbox, each element is a item of the listbox
# manipulation of the list changes the listbox
@ -277,7 +277,7 @@ lstbx = Listbox(parent, height=num, listvariable=item_list:list)
### SELECTING ITEMS
```py
```py linenums="1"
lstbx['selectmode'] = mode # MODE: browse (single selection), extended (multiple selection)
lstbx.curselection() # returns list of indices of selected items
# on selection change: generate event <ListboxSelect>
@ -288,7 +288,7 @@ lstbx.curselection() # returns list of indices of selected items
## SCROLLBAR
```py
```py linenums="1"
scroll = ttk.Scrollbar(parent, orient=direction, command=widget.view)
# ORIENT: VERTICAL, HORIZONTAL
# WIDGET.VIEW: .xview, .yview
@ -301,7 +301,7 @@ widget.configure(yscrollcommand=scroll.set)
Box in right bottom of widget, allows resize.
```py
```py linenums="1"
ttk.Sizegrip(parent).grid(column=999, row=999, sticky=(S, E))
```
@ -309,7 +309,7 @@ ttk.Sizegrip(parent).grid(column=999, row=999, sticky=(S, E))
Area accepting multiple line of text.
```py
```py linenums="1"
txt = Text(parent, width=num:int, height=num:int, wrap=flag) # width is character num, height is row num
# FLAG: none (no wrapping), char (wrap at every character), word (wrap at word boundaries)
txt['state'] = flag # FLAG: disabled, normal
@ -323,7 +323,7 @@ txt.delete(start, end) # delete range of text
Feedback about progress of lenghty operation.
```py
```py linenums="1"
progbar = ttk.Progressbar(parent, orient=direction, length=num:int, value=num, maximum=num:float mode=mode)
# DIRECTION: VERTICAL, HORIZONTAL
# MODE: determinate (relative progress of completion), indeterminate (no estimate of completion)
@ -334,13 +334,13 @@ progbar = ttk.Progressbar(parent, orient=direction, length=num:int, value=num, m
### DETERMINATE PROGRESS
```py
```py linenums="1"
progbar.step(amount) # increment value of given amount (DEFAULT: 1.0)
```
### INDETERMINATE PROGRESS
```py
```py linenums="1"
progbar.start() # starts progressbar
progbar.stop() #stoops progressbar
```
@ -349,7 +349,7 @@ progbar.stop() #stoops progressbar
Provide a numeric value through direct manipulation.
```py
```py linenums="1"
scale = ttk.Scale(parent, orient=DIR, length=num:int, from_=num:float, to=num:float, command=cmd)
# COMMAND: calls cmd at every scale change, appends current value to func call
scale['value'] # set or read current value
@ -361,7 +361,7 @@ scale.get() # get current value
Choose numbers. The spinbox choses item from a list, arrows permit cycling lits items.
```py
```py linenums="1"
spinval = StringVar()
spin = Spinbox(parent, from_=num, to=num, textvariable=spinval, increment=num, value=lst, wrap=boolean)
# INCREMENT specifies increment\decrement by arrow button
@ -402,7 +402,7 @@ A single value for the option puts the same padding on both left and right (or t
while a two-value list lets you put different amounts on left and right (or top and bottom).
To add padding around an entire row or column, the "columnconfigure" and "rowconfigure" methods accept a "pad" option.
```py
```py linenums="1"
widget.grid(column=num, row=num, columnspan=num, rowspan=num, sticky=(), padx=num, pady=num) # sticky: N, S, E, W
widget.columnconfigure(pad=num, weight=num)
widget.rowconfigure(pad=num, weight=num)
@ -419,7 +419,7 @@ widget.grid_remove(slaves) # takes a list of slaves, removes slaves from grid (
### CREATING TOPLEVEL WINDOW
```py
```py linenums="1"
tlw = Toplevel(parent) # parent of root window, no need to grid it
window.destroy()
@ -429,7 +429,7 @@ window.destroy()
### CHANGING BEHAVIOR AND STYLE
```py
```py linenums="1"
# WINDOW TILE
window.title() # returns title of the window
window.title('new title') # sets title
@ -469,7 +469,7 @@ window.deiconify() # deiconifies window
### STANDARD DIALOGS
```py
```py linenums="1"
# SLEETING FILE AND DIRECTORIES
# on Windows and Mac invokes underlying OS dialogs directly
from tkinter import filedialog
@ -507,7 +507,7 @@ POSSIBLE ALERT/CONFIRMATION RETURN VALUES:
## SEPARATOR
```py
```py linenums="1"
# horizontal or vertical line between groups of widgets
separator = ttk.Separator(parent, orient=direction)
# DIRECTION: horizontal, vertical
@ -532,7 +532,7 @@ pw.forget(position) # remove widget from pane
Allows switching between multiple pages
```py
```py linenums="1"
nb = ttk.Notebook(parent)
f1 = ttk.Frame(parent, ...) # child of notebook
f2 = ttk.Frame(parent, ...)
@ -555,7 +555,7 @@ nb.tab(tabid, option=value) # change tab option
Creation of personalized fonts
```py
```py linenums="1"
from tkinter import font
font_name = font.Font(family='font_family', size=num, weight='bold/normal', slant='roman/italic', underline=boolean, overstrike=boolean)
# FAMILY: Courier, Times, Helvetica (support guaranteed)
@ -573,7 +573,7 @@ label['image'] = imgobj
#### IMAGES W/ Pillow
```py
```py linenums="1"
from PIL import ImageTk, Image
myimg = ImageTk.PhotoImage(Image.open('filename'))
```