mirror of
https://github.com/m-lamonaca/dev-notes.git
synced 2025-06-08 10:47:13 +00:00
Upload of pre-existing files
This commit is contained in:
commit
4c21152830
150 changed files with 730703 additions and 0 deletions
167
Python/Libs/Web/BeautifulSoup.md
Normal file
167
Python/Libs/Web/BeautifulSoup.md
Normal file
|
@ -0,0 +1,167 @@
|
|||
# [Beautiful Soup Library](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
|
||||
|
||||
## Making the Soup
|
||||
|
||||
```py
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import lxml # better html parser than built-in
|
||||
|
||||
response = requests.get("url") # retuire a web page
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser") # parse HTML from response w/ python default HTML parser
|
||||
soup = BeautifulSoup(response.text, "lxml") # parse HTML from response w/ lxml parser
|
||||
|
||||
soup.prettify() # prettify parsed HTML for display
|
||||
```
|
||||
|
||||
## Kinds of Objects
|
||||
|
||||
Beautiful Soup transforms a complex HTML document into a complex tree of Python objects.
|
||||
|
||||
### Tag
|
||||
|
||||
A Tag object corresponds to an XML or HTML tag in the original document
|
||||
|
||||
```py
|
||||
soup = BeautifulSoup('<b class="boldest">Extremely bold</b>', 'html.parser') # parse HTML/XML
|
||||
|
||||
tag = soup.b
|
||||
type(tag) # <class 'bs4.element.Tag'>
|
||||
print(tag) # <b class="boldest">Extremely bold</b>
|
||||
|
||||
tag.name # tag name
|
||||
tag["attribute"] # access to ttag attribute values
|
||||
tag.attrs # dict of attribue-value pairs
|
||||
```
|
||||
|
||||
### Navigable String
|
||||
|
||||
A string corresponds to a bit of text within a tag. Beautiful Soup uses the `NavigableString` class to contain these bits of text.
|
||||
|
||||
## Navigating the Tree
|
||||
|
||||
### Going Down
|
||||
|
||||
```py
|
||||
soup.<tag>.<child_tag> # navigate using tag names
|
||||
|
||||
<tag>.contents # direct children as a list
|
||||
<tag>.children # direct children as a genrator for iteration
|
||||
<tag>.descendats # iterator over all childered, recusive
|
||||
|
||||
<tag>.string # tag contents, does not have further children
|
||||
# If a tag’s only child is another tag, and that tag has a .string, then the parenttag is considered to have the same .string as its child
|
||||
# If a tag contains more than one thing, then it’s not clear what .string should refer to, so .string is defined to be None
|
||||
|
||||
<tag>.strings # generattor to iterate over all children's strings (will list white space)
|
||||
<tag>.stripped_strings # generattor to iterate over all children's strings (will NOT list white space)
|
||||
```
|
||||
|
||||
### Going Up
|
||||
|
||||
```py
|
||||
<tag>.parent # tags direct parent (BeautifleSoup has parent None, html has parent BeautifulSoup)
|
||||
<tag>.parents # iterable over all parents
|
||||
```
|
||||
|
||||
### Going Sideways
|
||||
|
||||
```py
|
||||
<tag>.previous_sibling
|
||||
<tag>.next_sibling
|
||||
|
||||
<tag>.previous_siblings
|
||||
<tag>.next_siblings
|
||||
```
|
||||
|
||||
### Going Back and Forth
|
||||
|
||||
```py
|
||||
<tag>.previous_element # whatever was parsed immediately before
|
||||
<tag>.next_element # whatever was parsed immediately afterwards
|
||||
|
||||
<tag>.previous_elements # whatever was parsed immediately before as a list
|
||||
<tag>.next_elements # whatever was parsed immediately afterwards as a list
|
||||
```
|
||||
|
||||
## Searching the Tree
|
||||
|
||||
## Filter Types
|
||||
|
||||
```py
|
||||
soup.find_all("tag") # by name
|
||||
soup.find_all(["tag1", "tag2"]) # multiple tags in a list
|
||||
soup.find_all(function) # based on a bool function
|
||||
sopu.find_all(True) # Match everyting
|
||||
```
|
||||
|
||||
## Methods
|
||||
|
||||
Methods arguments:
|
||||
|
||||
- `name` (string): tag to search for
|
||||
- `attrs` (dict): attributte-value pai to search for
|
||||
- `string` (string): search by string contents rather than by tag
|
||||
- `limit` (int). limit number of results
|
||||
- `**kwargs`: be turned into a filter on one of a tag’s attributes.
|
||||
|
||||
```py
|
||||
find_all(name, attrs, recursive, string, limit, **kwargs) # several results
|
||||
find(name, attrs, recursive, string, **kwargs) # one result
|
||||
|
||||
find_parents(name, attrs, string, limit, **kwargs) # several results
|
||||
find_parent(name, attrs, string, **kwargs) # one result
|
||||
|
||||
find_next_siblings(name, attrs, string, limit, **kwargs) # several results
|
||||
find_next_sibling(name, attrs, string, **kwargs) # one result
|
||||
|
||||
find_previous_siblings(name, attrs, string, limit, **kwargs) # several results
|
||||
find_previous_sibling(name, attrs, string, **kwargs) # one result
|
||||
|
||||
find_all_next(name, attrs, string, limit, **kwargs) # several results
|
||||
find_next(name, attrs, string, **kwargs) # one result
|
||||
|
||||
find_all_previous(name, attrs, string, limit, **kwargs) # several results
|
||||
find_previous(name, attrs, string, **kwargs) # one result
|
||||
|
||||
soup("html_tag") # same as soup.find_all("html_tag")
|
||||
soup.find("html_tag").text # text of the found tag
|
||||
soup.select("css_selector") # search for CSS selectors of HTML tags
|
||||
```
|
||||
|
||||
## Modifying the Tree
|
||||
|
||||
### Changing Tag Names an Attributes
|
||||
|
||||
```py
|
||||
<tag>.name = "new_html_tag" # modify the tag type
|
||||
<tag>["attribute"] = "value" # modify the attribute value
|
||||
del <tag>["attribute"] # remove the attribute
|
||||
|
||||
soup.new_tag("name", <attribute> = "value") # creat a new tag with specified name and attributes
|
||||
|
||||
<tag>.string = "new content" # modify tag text content
|
||||
<tag>.append(item) # append to Tag content
|
||||
<tag>.extend([item1, item2]) # add every element of the list in order
|
||||
|
||||
<tag>.insert(position: int, item) # like .insert in Python list
|
||||
|
||||
<tag>.insert_before(new_tag) # insert tags or strings immediately before something else in the parse tree
|
||||
<tag>.insert_after(new_tag) # insert tags or strings immediately before something else in the parse tree
|
||||
|
||||
<tag>.clear() # remove all tag's contents
|
||||
|
||||
<tag>.extract() # extract and return the tag from the tree (operates on self)
|
||||
<tag>.string.extract() # extract and return the string from the tree (operates on self)
|
||||
<tag>.decompose() # remove a tag from the tree, then completely destroy it and its contents
|
||||
<tag>.decomposed # check if tag has be decomposed
|
||||
|
||||
<tag>.replace_with(item) # remove a tag or string from the tree, and replaces it with the tag or string of choice
|
||||
|
||||
<tag>.wrap(other_tag) # wrap an element in the tag you specify, return the new wrapper
|
||||
<tag>.unwrap() # replace a tag with whatever’s inside, good for stripping out markup
|
||||
|
||||
<tag>.smooth() # clean up the parse tree by consolidating adjacent strings
|
||||
```
|
148
Python/Libs/Web/Flask/Flask.md
Normal file
148
Python/Libs/Web/Flask/Flask.md
Normal file
|
@ -0,0 +1,148 @@
|
|||
# Flask
|
||||
|
||||
```python
|
||||
from flask import Flask, render_template
|
||||
|
||||
app = Flask(__name__, template_folder="path_to_folder") # create app
|
||||
|
||||
# template folder contains html pages
|
||||
@app.route("/") # define URLs
|
||||
def index():
|
||||
|
||||
return render_template("index.html") # parse HTML page and return it
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# run server if server is single file
|
||||
app.run(debug=True, host="0.0.0.0")
|
||||
```
|
||||
|
||||
`@app.route("/page/")` enables to access the page with `url/page` and `url/page/`. The same is possible using `app.add_url_rule("/", "page", function)`.
|
||||
|
||||
## Variable Rules
|
||||
|
||||
You can add variable sections to a URL by marking sections with `<variable_name>`.
|
||||
Your function then receives the `<variable_name>` as a keyword argument.
|
||||
Optionally, you can use a converter to specify the type of the argument like `<converter:variable_name>`.
|
||||
|
||||
Converter Type | Accepts
|
||||
---------------|------------------------------
|
||||
`string` | any text without a slash (default option)
|
||||
`int` | positive integers
|
||||
`float` | positive floating point values
|
||||
`path` | strings with slashes
|
||||
`uuid` | UUID strings
|
||||
|
||||
```python
|
||||
@app.route("/user/<string:username>") # hanle URL at runtime
|
||||
def profile(username):
|
||||
return f"{escape(username)}'s profile'"
|
||||
```
|
||||
|
||||
## Redirection
|
||||
|
||||
`url_for(endpoint, **values)` is used to redirect passing keyeworderd arguments. It can be used in combination with `@app.route("/<value>")` to accept the paassed arguments.
|
||||
|
||||
```py
|
||||
from flask import Flask, redirect, url_for
|
||||
|
||||
@app.route("/url")
|
||||
def func():
|
||||
|
||||
return redirect(url_for("html_file/function")) # redirect to other page
|
||||
```
|
||||
|
||||
## Jinja Template Rendering (Parsing Python Code in HTML, CSS)
|
||||
|
||||
* `{% ... %}` for **Statements**
|
||||
* `{{ ... }}` for **Expressions** to print to the template output
|
||||
* `{# ... #}` for **Comments** not included in the template output
|
||||
* `# ... ##` for **Line Statements**
|
||||
|
||||
Use `{% block block_code %}` to put a line python code inside HTML.
|
||||
Use `{% end<block> %}` to end a block of code.
|
||||
|
||||
In `page.html`;
|
||||
|
||||
```py
|
||||
<html>
|
||||
{% for item in content %}
|
||||
<p>{{item}}</p>
|
||||
{% endfor %}
|
||||
</html>
|
||||
```
|
||||
|
||||
In `file.py`:
|
||||
|
||||
```py
|
||||
@app.route("/page/)
|
||||
def func():
|
||||
return render_template("page.html", content=["A", "B", "C"])
|
||||
```
|
||||
|
||||
### Hyperlinks
|
||||
|
||||
In `file.py`:
|
||||
|
||||
```py
|
||||
@app.route('/linked_page/')
|
||||
def cool_form():
|
||||
return render_template('linked_page.html')
|
||||
```
|
||||
|
||||
In `page.html`:
|
||||
|
||||
```html
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<a href="{{ url_for('linked_page') }}">link text</a>
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
### CSS
|
||||
|
||||
Put `style.css` inside `/static/style`.
|
||||
|
||||
In `page.html`:
|
||||
|
||||
```html
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<link rel="stylesheet" href="{{ url_for('static', filename='style/style.css') }}">
|
||||
<body>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
## Template Inheritance
|
||||
|
||||
In `parent_template.html`:
|
||||
|
||||
```html
|
||||
<html>
|
||||
<!-- html content -->
|
||||
{% block block_name %}
|
||||
{% endblock %}
|
||||
<!-- html content -->
|
||||
</html>
|
||||
```
|
||||
|
||||
The content of the block will be filled by the child class.
|
||||
|
||||
In `child_template.hmtl`:
|
||||
|
||||
```html
|
||||
{% extends "parent_template.html" %}
|
||||
{% block block_name}
|
||||
{{ super() }} <!-- use parent's contents -->
|
||||
<!-- block content -->
|
||||
{% endblock %}
|
||||
```
|
34
Python/Libs/Web/Flask/requests.md
Normal file
34
Python/Libs/Web/Flask/requests.md
Normal file
|
@ -0,0 +1,34 @@
|
|||
# Flask Requests
|
||||
|
||||
Specify allowed HTTP methods in `file.py`:
|
||||
`@app.route("/page/", methods=["allowed methods"])`
|
||||
|
||||
## Forms
|
||||
|
||||
in `file.py`:
|
||||
|
||||
```py
|
||||
from flask import Flask, render_template
|
||||
from flask.globals import request
|
||||
|
||||
@app.route("/login/", methods=["GET", "POST"])
|
||||
def login():
|
||||
if request.method == "POST": # if POST then form has been filled
|
||||
data = request.form["field name"] # store the form's data in variable
|
||||
# manipulate form data
|
||||
|
||||
req_args = request.args # request args
|
||||
|
||||
else: # if GET then is asking for form page
|
||||
return render_template("login.html")
|
||||
```
|
||||
|
||||
In `login.html`:
|
||||
|
||||
```html
|
||||
<html>
|
||||
<!-- action="#" goes to page itsef but with # at the end of the URL -->
|
||||
<form action="#" method="post">
|
||||
<input type="text" name="field name">
|
||||
</html>
|
||||
```
|
146
Python/Libs/Web/requests.md
Normal file
146
Python/Libs/Web/requests.md
Normal file
|
@ -0,0 +1,146 @@
|
|||
# Requests Lib
|
||||
|
||||
## GET REQUEST
|
||||
|
||||
Get or retrieve data from specified resource
|
||||
|
||||
```py
|
||||
response = requests.get('URL') # returns response object
|
||||
|
||||
# PAYLOAD -> valuable information of response
|
||||
response.status_code # http status code
|
||||
```
|
||||
|
||||
The response message consists of:
|
||||
|
||||
- status line which includes the status code and reason message
|
||||
- response header fields (e.g., Content-Type: text/html)
|
||||
- empty line
|
||||
- optional message body
|
||||
|
||||
```text
|
||||
1xx -> INFORMATIONAL RESPONSE
|
||||
2xx -> SUCCESS
|
||||
200 OK -> request succesful
|
||||
3xx -> REDIRECTION
|
||||
4xx -> CLIENT ERRORS
|
||||
404 NOT FOUND -> resource not found
|
||||
5xx -> SERVER ERRORS
|
||||
```
|
||||
|
||||
```py
|
||||
# raise exception HTTPError for error status codes
|
||||
response.raise_for_status()
|
||||
|
||||
response.content # raw bytes of payload
|
||||
response.encoding = 'utf-8' # specify encoding
|
||||
response.text # string payload (serialized JSON)
|
||||
response.json() # dict of payload
|
||||
response.headers # response headers (dict)
|
||||
```
|
||||
|
||||
### QUERY STRING PARAMETERS
|
||||
|
||||
```py
|
||||
response = requests.get('URL', params={'q':'query'})
|
||||
response = requests.get('URL', params=[('q', 'query')])
|
||||
response = requests.get('URL', params=b'q=query')
|
||||
```
|
||||
|
||||
### REQUEST HEADERS
|
||||
|
||||
```py
|
||||
response = requests.get(
|
||||
'URL',
|
||||
params={'q': 'query'},
|
||||
headers={'header': 'header_query'}
|
||||
)
|
||||
```
|
||||
|
||||
## OTHER HTTP METHODS
|
||||
|
||||
### DATA INPUT
|
||||
|
||||
```py
|
||||
# requests that entity enclosed be stored as a new subordinate of the web resource identified by the URI
|
||||
requests.post('URL', data={'key':'value'})
|
||||
# requests that the enclosed entity be stored under the supplied URI
|
||||
requests.put('URL', data={'key':'value'})
|
||||
# applies partial modification
|
||||
requests.patch('URL', data={'key':'value'})
|
||||
# deletes specified resource
|
||||
requests.delete('URL')
|
||||
# ask for a response but without the response body (only headers)
|
||||
requests.head('URL')
|
||||
# returns supported HTTP methods of the server
|
||||
requests.options('URL')
|
||||
```
|
||||
|
||||
### SENDING JSON DATA
|
||||
|
||||
```py
|
||||
requests.post('URL', json={'key': 'value'})
|
||||
```
|
||||
|
||||
### INSPECTING THE REQUEST
|
||||
|
||||
```py
|
||||
# requests lib prepares the requests nefore sending it
|
||||
response = requests.post('URL', data={'key':'value'})
|
||||
response.request.something # inspect request field
|
||||
```
|
||||
|
||||
## AUTHENTICATION
|
||||
|
||||
```py
|
||||
requests.get('URL', auth=('uesrname', 'password')) # use implicit HTTP Basic Authorization
|
||||
|
||||
# explicit HTTP Basic Authorization and other
|
||||
from requests.auth import HTTPBasicAuth, HTTPDigestAuth, HTTPProxyAuth
|
||||
from getpass import getpass
|
||||
requests.get('URL', auth=HTTPBasicAuth('username', getpass()))
|
||||
```
|
||||
|
||||
### PERSOANLIZED AUTH
|
||||
|
||||
```py
|
||||
from requests.auth import AuthBase
|
||||
class TokenAuth(AuthBase):
|
||||
"custom authentication scheme"
|
||||
|
||||
def __init__(self, token):
|
||||
self.token = token
|
||||
|
||||
def __call__(self, r):
|
||||
"""Attach API token to custom auth"""
|
||||
r.headers['X-TokenAuth'] = f'{self.token}'
|
||||
return r
|
||||
|
||||
requests.get('URL', auth=TokenAuth('1234abcde-token'))
|
||||
```
|
||||
|
||||
### DISABLING SSL VERIFICATION
|
||||
|
||||
```py
|
||||
requests.get('URL', verify=False)
|
||||
```
|
||||
|
||||
## PERFORMANCE
|
||||
|
||||
### REQUEST TIMEOUT
|
||||
|
||||
```py
|
||||
# raise Timeout exception if request times out
|
||||
requests.get('URL', timeout=(connection_timeout, read_timeout))
|
||||
```
|
||||
|
||||
### MAX RETRIES
|
||||
|
||||
```py
|
||||
from requests.adapters import HTTPAdapter
|
||||
URL_adapter = HTTPAdapter(max_retries = int)
|
||||
session = requests.Session()
|
||||
|
||||
# use URL_adapter for all requests to URL
|
||||
session.mount('URL', URL_adapter)
|
||||
```
|
Loading…
Add table
Add a link
Reference in a new issue