from typing import Union from charset_normalizer import from_bytes from charset_normalizer.constant import TOO_SMALL_SEQUENCE UTF8 = 'utf-8' ContentBytes = Union[bytearray, bytes] def detect_encoding(content: ContentBytes) -> str: """ We default to UTF-8 if text too short, because the detection can return a random encoding leading to confusing results given the `charset_normalizer` version (< 2.0.5). >>> too_short = ']"foo"' >>> detected = from_bytes(too_short.encode()).best().encoding >>> detected 'ascii' >>> too_short.encode().decode(detected) ']"foo"' """ encoding = UTF8 if len(content) > TOO_SMALL_SEQUENCE: match = from_bytes(bytes(content)).best() if match: encoding = match.encoding return encoding def smart_decode(content: ContentBytes, encoding: str) -> str: """Decode `content` using the given `encoding`. If no `encoding` is provided, the best effort is to guess it from `content`. Unicode errors are replaced. """ if not encoding: encoding = detect_encoding(content) return content.decode(encoding, 'replace') def smart_encode(content: str, encoding: str) -> bytes: """Encode `content` using the given `encoding`. Unicode errors are replaced. """ return content.encode(encoding, 'replace')
Name | Type | Size | Permission | Actions |
---|---|---|---|---|
__pycache__ | Folder | 0755 |
|
|
cli | Folder | 0755 |
|
|
output | Folder | 0755 |
|
|
plugins | Folder | 0755 |
|
|
__init__.py | File | 132 B | 0644 |
|
__main__.py | File | 394 B | 0644 |
|
client.py | File | 10.15 KB | 0644 |
|
compat.py | File | 1.82 KB | 0644 |
|
config.py | File | 3.42 KB | 0644 |
|
context.py | File | 3.83 KB | 0644 |
|
core.py | File | 8.74 KB | 0644 |
|
downloads.py | File | 13.85 KB | 0644 |
|
encoding.py | File | 1.32 KB | 0644 |
|
models.py | File | 3.37 KB | 0644 |
|
sessions.py | File | 4.85 KB | 0644 |
|
ssl.py | File | 1.84 KB | 0644 |
|
status.py | File | 987 B | 0644 |
|
uploads.py | File | 4.01 KB | 0644 |
|
utils.py | File | 6.12 KB | 0644 |
|