Upload of pre-existing files

This commit is contained in:
Marcello Lamonaca 2021-01-31 11:05:37 +01:00
commit 4c21152830
150 changed files with 730703 additions and 0 deletions

View file

@ -0,0 +1,167 @@
# [Beautiful Soup Library](https://www.crummy.com/software/BeautifulSoup/bs4/doc/)
## Making the Soup
```py
from bs4 import BeautifulSoup
import requests
import lxml # better html parser than built-in
response = requests.get("url") # retuire a web page
soup = BeautifulSoup(response.text, "html.parser") # parse HTML from response w/ python default HTML parser
soup = BeautifulSoup(response.text, "lxml") # parse HTML from response w/ lxml parser
soup.prettify() # prettify parsed HTML for display
```
## Kinds of Objects
Beautiful Soup transforms a complex HTML document into a complex tree of Python objects.
### Tag
A Tag object corresponds to an XML or HTML tag in the original document
```py
soup = BeautifulSoup('<b class="boldest">Extremely bold</b>', 'html.parser') # parse HTML/XML
tag = soup.b
type(tag) # <class 'bs4.element.Tag'>
print(tag) # <b class="boldest">Extremely bold</b>
tag.name # tag name
tag["attribute"] # access to ttag attribute values
tag.attrs # dict of attribue-value pairs
```
### Navigable String
A string corresponds to a bit of text within a tag. Beautiful Soup uses the `NavigableString` class to contain these bits of text.
## Navigating the Tree
### Going Down
```py
soup.<tag>.<child_tag> # navigate using tag names
<tag>.contents # direct children as a list
<tag>.children # direct children as a genrator for iteration
<tag>.descendats # iterator over all childered, recusive
<tag>.string # tag contents, does not have further children
# If a tags only child is another tag, and that tag has a .string, then the parenttag is considered to have the same .string as its child
# If a tag contains more than one thing, then its not clear what .string should refer to, so .string is defined to be None
<tag>.strings # generattor to iterate over all children's strings (will list white space)
<tag>.stripped_strings # generattor to iterate over all children's strings (will NOT list white space)
```
### Going Up
```py
<tag>.parent # tags direct parent (BeautifleSoup has parent None, html has parent BeautifulSoup)
<tag>.parents # iterable over all parents
```
### Going Sideways
```py
<tag>.previous_sibling
<tag>.next_sibling
<tag>.previous_siblings
<tag>.next_siblings
```
### Going Back and Forth
```py
<tag>.previous_element # whatever was parsed immediately before
<tag>.next_element # whatever was parsed immediately afterwards
<tag>.previous_elements # whatever was parsed immediately before as a list
<tag>.next_elements # whatever was parsed immediately afterwards as a list
```
## Searching the Tree
## Filter Types
```py
soup.find_all("tag") # by name
soup.find_all(["tag1", "tag2"]) # multiple tags in a list
soup.find_all(function) # based on a bool function
sopu.find_all(True) # Match everyting
```
## Methods
Methods arguments:
- `name` (string): tag to search for
- `attrs` (dict): attributte-value pai to search for
- `string` (string): search by string contents rather than by tag
- `limit` (int). limit number of results
- `**kwargs`: be turned into a filter on one of a tags attributes.
```py
find_all(name, attrs, recursive, string, limit, **kwargs) # several results
find(name, attrs, recursive, string, **kwargs) # one result
find_parents(name, attrs, string, limit, **kwargs) # several results
find_parent(name, attrs, string, **kwargs) # one result
find_next_siblings(name, attrs, string, limit, **kwargs) # several results
find_next_sibling(name, attrs, string, **kwargs) # one result
find_previous_siblings(name, attrs, string, limit, **kwargs) # several results
find_previous_sibling(name, attrs, string, **kwargs) # one result
find_all_next(name, attrs, string, limit, **kwargs) # several results
find_next(name, attrs, string, **kwargs) # one result
find_all_previous(name, attrs, string, limit, **kwargs) # several results
find_previous(name, attrs, string, **kwargs) # one result
soup("html_tag") # same as soup.find_all("html_tag")
soup.find("html_tag").text # text of the found tag
soup.select("css_selector") # search for CSS selectors of HTML tags
```
## Modifying the Tree
### Changing Tag Names an Attributes
```py
<tag>.name = "new_html_tag" # modify the tag type
<tag>["attribute"] = "value" # modify the attribute value
del <tag>["attribute"] # remove the attribute
soup.new_tag("name", <attribute> = "value") # creat a new tag with specified name and attributes
<tag>.string = "new content" # modify tag text content
<tag>.append(item) # append to Tag content
<tag>.extend([item1, item2]) # add every element of the list in order
<tag>.insert(position: int, item) # like .insert in Python list
<tag>.insert_before(new_tag) # insert tags or strings immediately before something else in the parse tree
<tag>.insert_after(new_tag) # insert tags or strings immediately before something else in the parse tree
<tag>.clear() # remove all tag's contents
<tag>.extract() # extract and return the tag from the tree (operates on self)
<tag>.string.extract() # extract and return the string from the tree (operates on self)
<tag>.decompose() # remove a tag from the tree, then completely destroy it and its contents
<tag>.decomposed # check if tag has be decomposed
<tag>.replace_with(item) # remove a tag or string from the tree, and replaces it with the tag or string of choice
<tag>.wrap(other_tag) # wrap an element in the tag you specify, return the new wrapper
<tag>.unwrap() # replace a tag with whatevers inside, good for stripping out markup
<tag>.smooth() # clean up the parse tree by consolidating adjacent strings
```

View file

@ -0,0 +1,148 @@
# Flask
```python
from flask import Flask, render_template
app = Flask(__name__, template_folder="path_to_folder") # create app
# template folder contains html pages
@app.route("/") # define URLs
def index():
return render_template("index.html") # parse HTML page and return it
if __name__ == "__main__":
# run server if server is single file
app.run(debug=True, host="0.0.0.0")
```
`@app.route("/page/")` enables to access the page with `url/page` and `url/page/`. The same is possible using `app.add_url_rule("/", "page", function)`.
## Variable Rules
You can add variable sections to a URL by marking sections with `<variable_name>`.
Your function then receives the `<variable_name>` as a keyword argument.
Optionally, you can use a converter to specify the type of the argument like `<converter:variable_name>`.
Converter Type | Accepts
---------------|------------------------------
`string` | any text without a slash (default option)
`int` | positive integers
`float` | positive floating point values
`path` | strings with slashes
`uuid` | UUID strings
```python
@app.route("/user/<string:username>") # hanle URL at runtime
def profile(username):
return f"{escape(username)}'s profile'"
```
## Redirection
`url_for(endpoint, **values)` is used to redirect passing keyeworderd arguments. It can be used in combination with `@app.route("/<value>")` to accept the paassed arguments.
```py
from flask import Flask, redirect, url_for
@app.route("/url")
def func():
return redirect(url_for("html_file/function")) # redirect to other page
```
## Jinja Template Rendering (Parsing Python Code in HTML, CSS)
* `{% ... %}` for **Statements**
* `{{ ... }}` for **Expressions** to print to the template output
* `{# ... #}` for **Comments** not included in the template output
* `# ... ##` for **Line Statements**
Use `{% block block_code %}` to put a line python code inside HTML.
Use `{% end<block> %}` to end a block of code.
In `page.html`;
```py
<html>
{% for item in content %}
<p>{{item}}</p>
{% endfor %}
</html>
```
In `file.py`:
```py
@app.route("/page/)
def func():
return render_template("page.html", content=["A", "B", "C"])
```
### Hyperlinks
In `file.py`:
```py
@app.route('/linked_page/')
def cool_form():
return render_template('linked_page.html')
```
In `page.html`:
```html
<!doctype html>
<html>
<head>
</head>
<body>
<a href="{{ url_for('linked_page') }}">link text</a>
</body>
</html>
```
### CSS
Put `style.css` inside `/static/style`.
In `page.html`:
```html
<!doctype html>
<html>
<head>
</head>
<link rel="stylesheet" href="{{ url_for('static', filename='style/style.css') }}">
<body>
</body>
</html>
```
## Template Inheritance
In `parent_template.html`:
```html
<html>
<!-- html content -->
{% block block_name %}
{% endblock %}
<!-- html content -->
</html>
```
The content of the block will be filled by the child class.
In `child_template.hmtl`:
```html
{% extends "parent_template.html" %}
{% block block_name}
{{ super() }} <!-- use parent's contents -->
<!-- block content -->
{% endblock %}
```

View file

@ -0,0 +1,34 @@
# Flask Requests
Specify allowed HTTP methods in `file.py`:
`@app.route("/page/", methods=["allowed methods"])`
## Forms
in `file.py`:
```py
from flask import Flask, render_template
from flask.globals import request
@app.route("/login/", methods=["GET", "POST"])
def login():
if request.method == "POST": # if POST then form has been filled
data = request.form["field name"] # store the form's data in variable
# manipulate form data
req_args = request.args # request args
else: # if GET then is asking for form page
return render_template("login.html")
```
In `login.html`:
```html
<html>
<!-- action="#" goes to page itsef but with # at the end of the URL -->
<form action="#" method="post">
<input type="text" name="field name">
</html>
```

146
Python/Libs/Web/requests.md Normal file
View file

@ -0,0 +1,146 @@
# Requests Lib
## GET REQUEST
Get or retrieve data from specified resource
```py
response = requests.get('URL') # returns response object
# PAYLOAD -> valuable information of response
response.status_code # http status code
```
The response message consists of:
- status line which includes the status code and reason message
- response header fields (e.g., Content-Type: text/html)
- empty line
- optional message body
```text
1xx -> INFORMATIONAL RESPONSE
2xx -> SUCCESS
200 OK -> request succesful
3xx -> REDIRECTION
4xx -> CLIENT ERRORS
404 NOT FOUND -> resource not found
5xx -> SERVER ERRORS
```
```py
# raise exception HTTPError for error status codes
response.raise_for_status()
response.content # raw bytes of payload
response.encoding = 'utf-8' # specify encoding
response.text # string payload (serialized JSON)
response.json() # dict of payload
response.headers # response headers (dict)
```
### QUERY STRING PARAMETERS
```py
response = requests.get('URL', params={'q':'query'})
response = requests.get('URL', params=[('q', 'query')])
response = requests.get('URL', params=b'q=query')
```
### REQUEST HEADERS
```py
response = requests.get(
'URL',
params={'q': 'query'},
headers={'header': 'header_query'}
)
```
## OTHER HTTP METHODS
### DATA INPUT
```py
# requests that entity enclosed be stored as a new subordinate of the web resource identified by the URI
requests.post('URL', data={'key':'value'})
# requests that the enclosed entity be stored under the supplied URI
requests.put('URL', data={'key':'value'})
# applies partial modification
requests.patch('URL', data={'key':'value'})
# deletes specified resource
requests.delete('URL')
# ask for a response but without the response body (only headers)
requests.head('URL')
# returns supported HTTP methods of the server
requests.options('URL')
```
### SENDING JSON DATA
```py
requests.post('URL', json={'key': 'value'})
```
### INSPECTING THE REQUEST
```py
# requests lib prepares the requests nefore sending it
response = requests.post('URL', data={'key':'value'})
response.request.something # inspect request field
```
## AUTHENTICATION
```py
requests.get('URL', auth=('uesrname', 'password')) # use implicit HTTP Basic Authorization
# explicit HTTP Basic Authorization and other
from requests.auth import HTTPBasicAuth, HTTPDigestAuth, HTTPProxyAuth
from getpass import getpass
requests.get('URL', auth=HTTPBasicAuth('username', getpass()))
```
### PERSOANLIZED AUTH
```py
from requests.auth import AuthBase
class TokenAuth(AuthBase):
"custom authentication scheme"
def __init__(self, token):
self.token = token
def __call__(self, r):
"""Attach API token to custom auth"""
r.headers['X-TokenAuth'] = f'{self.token}'
return r
requests.get('URL', auth=TokenAuth('1234abcde-token'))
```
### DISABLING SSL VERIFICATION
```py
requests.get('URL', verify=False)
```
## PERFORMANCE
### REQUEST TIMEOUT
```py
# raise Timeout exception if request times out
requests.get('URL', timeout=(connection_timeout, read_timeout))
```
### MAX RETRIES
```py
from requests.adapters import HTTPAdapter
URL_adapter = HTTPAdapter(max_retries = int)
session = requests.Session()
# use URL_adapter for all requests to URL
session.mount('URL', URL_adapter)
```