Module bases.encoding.fixed_char
Expand source code
import binascii
import math
from typing import Any, Dict, List, Mapping, Optional, Union
from typing_extensions import Literal
from bases.alphabet import Alphabet
from .base import BaseEncoding
from .errors import DecodingError, InvalidDigitError, PaddingError
PaddingOptions = Literal["ignore", "include", "require"]
def _lcm(a: int, b: int) -> int:
# math.lcm only available in Python 3.8+
return a*b//math.gcd(a, b)
class FixedCharBaseEncoding(BaseEncoding):
_char_nbits: int
_init_char_nbits: Union[int, Literal["auto"]]
_pad_char: Optional[str] = None
_padding: PaddingOptions = "ignore"
_block_nbytes: int
_block_nchars: int
def __init__(self, alphabet: Union[str, range, Alphabet], *,
case_sensitive: Optional[bool] = None,
char_nbits: Union[int, Literal["auto"]] = "auto",
pad_char: Optional[str] = None,
padding: PaddingOptions = "ignore"):
if padding not in ("ignore", "include", "require"):
raise TypeError("Allowed padding options are: 'ignore', 'include' and 'require'.")
super().__init__(alphabet, case_sensitive=case_sensitive)
self._init_char_nbits = char_nbits
if char_nbits == "auto":
char_nbits = int(math.ceil(math.log2(self.base)))
self._char_nbits = char_nbits
self._pad_char = pad_char
self._padding = padding
self.__validate_init()
l = _lcm(char_nbits, 8)
self._block_nbytes = l//8
self._block_nchars = l//char_nbits
def __validate_init(self) -> None:
alphabet = self.alphabet
pad_char = self.pad_char
if pad_char is None:
if self.padding != "ignore":
raise ValueError("If padding is not 'ignore', a padding character must be specified.")
else:
if len(pad_char) != 1:
raise ValueError("If specified, padding character must have length 1.")
if pad_char in alphabet:
raise ValueError("Padding character cannot be in the alphabet.")
char_nbits = self.char_nbits
if char_nbits is not None:
if char_nbits <= 0:
raise ValueError("If specified, number of bits per character must be positive.")
if 2**char_nbits < self.base:
raise ValueError(f"Number of bits per character is insufficient to cover the whole alphabet. This is likely a mistake. "
f"If it isn't, please truncate the alphabet to {2**char_nbits} characters (or less).")
@property
def char_nbits(self) -> int:
return self._char_nbits
@property
def block_nchars(self) -> int:
return self._block_nchars
@property
def effective_base(self) -> int:
effective_base: int = 2**self.char_nbits
return effective_base
@property
def padding(self) -> PaddingOptions:
return self._padding
@property
def include_padding(self) -> bool:
return self.padding in ("include", "require")
@property
def require_padding(self) -> bool:
return self.padding == "require"
@property
def pad_char(self) -> Optional[str]:
"""
An optional character to be used for padding of encoded strings.
In [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html), this is `"="` for both base64 and base32,
but it is `None` for base16 (where no padding of encoded strings is ever required).
"""
return self._pad_char
def pad(self, require: bool = False) -> "FixedCharBaseEncoding":
options = dict(include_padding=True, require_padding=require)
return self.with_options(**options)
def nopad(self, allow: bool = True) -> "FixedCharBaseEncoding":
options = dict(include_padding=False, require_padding=False, pad_char=self.pad_char if allow else None)
return self.with_options(**options)
def with_pad_char(self, pad_char: Optional[str]) -> "FixedCharBaseEncoding":
options: Dict[str, Any] = dict(pad_char=pad_char)
if pad_char is None:
options["include_padding"] = False
options["require_padding"] = False
return self.with_options(**options)
def pad_string(self, s: str) -> str:
"""
If no padding character is specified for this encoding, returns the input string unchanged.
If a padding character is specified for this encoding, pads the input string by appending the
minimum number of padding characters necessary to make its length an integral multiple of the
block char size (given by `FixedCharBaseEncoding.block_nchars`).
"""
if not isinstance(s, str):
raise TypeError()
pad_char = self.pad_char
block_nchars = self._block_nchars
# no padding available for this encoding scheme
if pad_char is None:
return s
# padding available, but no need for padding
if len(s)%block_nchars == 0:
return s
# compute require padding length
pad_len = block_nchars-(len(s)%block_nchars)
# return padded string
return s+pad_char*pad_len
def strip_string(self, s: str) -> str:
if not isinstance(s, str):
raise TypeError()
pad_char = self.pad_char
case_sensitive = self.case_sensitive
block_nchars = self._block_nchars
# no padding available for this encoding scheme
if pad_char is None:
return s
# padding character(s) to strip from the right of the string
pad_chars = pad_char
if not case_sensitive:
pad_chars += pad_char.lower()+pad_char.upper()
# strip padding from string
s_stripped = s.rstrip(pad_chars)
# if padding is required on decoding, check the correct amount was included
if self.require_padding:
padding = len(s)-len(s_stripped)
extra_nchars = len(s_stripped)%block_nchars
expected_padding = 0 if extra_nchars == 0 else block_nchars-extra_nchars
if padding != expected_padding:
raise PaddingError(padding, expected_padding)
return s_stripped
def canonical_bytes(self, b: bytes) -> bytes:
self._validate_bytes(b)
return b
def canonical_string(self, s: str) -> str:
if self.include_padding:
return self.pad_string(s)
return self.strip_string(s)
def _validate_string(self, s: str) -> str:
s = self.strip_string(s)
return super()._validate_string(s)
def _encode(self, b: bytes) -> str:
alphabet = self.alphabet
base = self.base
char_nbits = self.char_nbits
effective_base = self.effective_base
# bytes as unsigned integer
i = int.from_bytes(b, byteorder="big")
# add padding bits (align to integral number of characters)
nchars, extra_nbits = divmod((8*len(b)), char_nbits)
if extra_nbits > 0:
i <<= char_nbits-extra_nbits # pad bits set to 0
nchars += 1
# compute characters in reverse order
revchars: List[str] = []
for _ in range(nchars):
# extract next digit by consuming rightmost char_nbits
# Same as: d = i % (2**char_nbits); i >>= char_nbits
i, d = divmod(i, effective_base)
# ensure digit is valid for actual base (number of characters in the alphabet)
if not d < base:
raise InvalidDigitError(d, base)
# add the next character to the list
revchars.append(alphabet[d])
# join characters, pad string (if padding is to be included) and return
s = "".join(reversed(revchars))
if not self.include_padding:
return s
return self.pad_string(s)
def _decode(self, s: str) -> bytes:
base = self.base
char_nbits = self.char_nbits
alphabet_revdir = self.alphabet.revdir
# decode string into unsigned integer
i = 0
for c in s:
d = alphabet_revdir[c]
i = i*base + d
# remove padding bits (ensure that there are not too many and that they are all set to zero)
original_nbytes, extra_nbits = divmod((char_nbits*len(s)), 8)
if extra_nbits >= char_nbits:
raise DecodingError(f"More pad bits found ({extra_nbits}) than bits per character ({char_nbits}).")
if extra_nbits > 0:
i, pad_bits = divmod(i, 2**extra_nbits)
if pad_bits != 0:
raise DecodingError("Pad bits must be zero.")
# convert unsigned integer into the required number of bytes (zero-pad to the left)
bitlen = i.bit_length()
nbytes = bitlen//8 if bitlen%8==0 else 1+bitlen//8
num_leading_zeros = original_nbytes-nbytes
return b"\x00"*num_leading_zeros+i.to_bytes(length=nbytes, byteorder="big")
def options(self, skip_defaults: bool = False) -> Mapping[str, Any]:
options: Dict[str, Any] = {}
if not skip_defaults or self._init_char_nbits != "auto":
options["char_nbits"] = self._init_char_nbits
if not skip_defaults or self.pad_char is not None:
options["pad_char"] = self.pad_char
if not skip_defaults or self.padding != "ignore":
options["padding"] = self.padding
return options
Classes
class FixedCharBaseEncoding (alphabet: Union[str, range, Alphabet], *, case_sensitive: Optional[bool] = None, char_nbits: Union[int, Literal['auto']] = 'auto', pad_char: Optional[str] = None, padding: Literal['ignore', 'include', 'require'] = 'ignore')
-
Helper class that provides a standard way to create an ABC using inheritance.
Expand source code
class FixedCharBaseEncoding(BaseEncoding): _char_nbits: int _init_char_nbits: Union[int, Literal["auto"]] _pad_char: Optional[str] = None _padding: PaddingOptions = "ignore" _block_nbytes: int _block_nchars: int def __init__(self, alphabet: Union[str, range, Alphabet], *, case_sensitive: Optional[bool] = None, char_nbits: Union[int, Literal["auto"]] = "auto", pad_char: Optional[str] = None, padding: PaddingOptions = "ignore"): if padding not in ("ignore", "include", "require"): raise TypeError("Allowed padding options are: 'ignore', 'include' and 'require'.") super().__init__(alphabet, case_sensitive=case_sensitive) self._init_char_nbits = char_nbits if char_nbits == "auto": char_nbits = int(math.ceil(math.log2(self.base))) self._char_nbits = char_nbits self._pad_char = pad_char self._padding = padding self.__validate_init() l = _lcm(char_nbits, 8) self._block_nbytes = l//8 self._block_nchars = l//char_nbits def __validate_init(self) -> None: alphabet = self.alphabet pad_char = self.pad_char if pad_char is None: if self.padding != "ignore": raise ValueError("If padding is not 'ignore', a padding character must be specified.") else: if len(pad_char) != 1: raise ValueError("If specified, padding character must have length 1.") if pad_char in alphabet: raise ValueError("Padding character cannot be in the alphabet.") char_nbits = self.char_nbits if char_nbits is not None: if char_nbits <= 0: raise ValueError("If specified, number of bits per character must be positive.") if 2**char_nbits < self.base: raise ValueError(f"Number of bits per character is insufficient to cover the whole alphabet. This is likely a mistake. " f"If it isn't, please truncate the alphabet to {2**char_nbits} characters (or less).") @property def char_nbits(self) -> int: return self._char_nbits @property def block_nchars(self) -> int: return self._block_nchars @property def effective_base(self) -> int: effective_base: int = 2**self.char_nbits return effective_base @property def padding(self) -> PaddingOptions: return self._padding @property def include_padding(self) -> bool: return self.padding in ("include", "require") @property def require_padding(self) -> bool: return self.padding == "require" @property def pad_char(self) -> Optional[str]: """ An optional character to be used for padding of encoded strings. In [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html), this is `"="` for both base64 and base32, but it is `None` for base16 (where no padding of encoded strings is ever required). """ return self._pad_char def pad(self, require: bool = False) -> "FixedCharBaseEncoding": options = dict(include_padding=True, require_padding=require) return self.with_options(**options) def nopad(self, allow: bool = True) -> "FixedCharBaseEncoding": options = dict(include_padding=False, require_padding=False, pad_char=self.pad_char if allow else None) return self.with_options(**options) def with_pad_char(self, pad_char: Optional[str]) -> "FixedCharBaseEncoding": options: Dict[str, Any] = dict(pad_char=pad_char) if pad_char is None: options["include_padding"] = False options["require_padding"] = False return self.with_options(**options) def pad_string(self, s: str) -> str: """ If no padding character is specified for this encoding, returns the input string unchanged. If a padding character is specified for this encoding, pads the input string by appending the minimum number of padding characters necessary to make its length an integral multiple of the block char size (given by `FixedCharBaseEncoding.block_nchars`). """ if not isinstance(s, str): raise TypeError() pad_char = self.pad_char block_nchars = self._block_nchars # no padding available for this encoding scheme if pad_char is None: return s # padding available, but no need for padding if len(s)%block_nchars == 0: return s # compute require padding length pad_len = block_nchars-(len(s)%block_nchars) # return padded string return s+pad_char*pad_len def strip_string(self, s: str) -> str: if not isinstance(s, str): raise TypeError() pad_char = self.pad_char case_sensitive = self.case_sensitive block_nchars = self._block_nchars # no padding available for this encoding scheme if pad_char is None: return s # padding character(s) to strip from the right of the string pad_chars = pad_char if not case_sensitive: pad_chars += pad_char.lower()+pad_char.upper() # strip padding from string s_stripped = s.rstrip(pad_chars) # if padding is required on decoding, check the correct amount was included if self.require_padding: padding = len(s)-len(s_stripped) extra_nchars = len(s_stripped)%block_nchars expected_padding = 0 if extra_nchars == 0 else block_nchars-extra_nchars if padding != expected_padding: raise PaddingError(padding, expected_padding) return s_stripped def canonical_bytes(self, b: bytes) -> bytes: self._validate_bytes(b) return b def canonical_string(self, s: str) -> str: if self.include_padding: return self.pad_string(s) return self.strip_string(s) def _validate_string(self, s: str) -> str: s = self.strip_string(s) return super()._validate_string(s) def _encode(self, b: bytes) -> str: alphabet = self.alphabet base = self.base char_nbits = self.char_nbits effective_base = self.effective_base # bytes as unsigned integer i = int.from_bytes(b, byteorder="big") # add padding bits (align to integral number of characters) nchars, extra_nbits = divmod((8*len(b)), char_nbits) if extra_nbits > 0: i <<= char_nbits-extra_nbits # pad bits set to 0 nchars += 1 # compute characters in reverse order revchars: List[str] = [] for _ in range(nchars): # extract next digit by consuming rightmost char_nbits # Same as: d = i % (2**char_nbits); i >>= char_nbits i, d = divmod(i, effective_base) # ensure digit is valid for actual base (number of characters in the alphabet) if not d < base: raise InvalidDigitError(d, base) # add the next character to the list revchars.append(alphabet[d]) # join characters, pad string (if padding is to be included) and return s = "".join(reversed(revchars)) if not self.include_padding: return s return self.pad_string(s) def _decode(self, s: str) -> bytes: base = self.base char_nbits = self.char_nbits alphabet_revdir = self.alphabet.revdir # decode string into unsigned integer i = 0 for c in s: d = alphabet_revdir[c] i = i*base + d # remove padding bits (ensure that there are not too many and that they are all set to zero) original_nbytes, extra_nbits = divmod((char_nbits*len(s)), 8) if extra_nbits >= char_nbits: raise DecodingError(f"More pad bits found ({extra_nbits}) than bits per character ({char_nbits}).") if extra_nbits > 0: i, pad_bits = divmod(i, 2**extra_nbits) if pad_bits != 0: raise DecodingError("Pad bits must be zero.") # convert unsigned integer into the required number of bytes (zero-pad to the left) bitlen = i.bit_length() nbytes = bitlen//8 if bitlen%8==0 else 1+bitlen//8 num_leading_zeros = original_nbytes-nbytes return b"\x00"*num_leading_zeros+i.to_bytes(length=nbytes, byteorder="big") def options(self, skip_defaults: bool = False) -> Mapping[str, Any]: options: Dict[str, Any] = {} if not skip_defaults or self._init_char_nbits != "auto": options["char_nbits"] = self._init_char_nbits if not skip_defaults or self.pad_char is not None: options["pad_char"] = self.pad_char if not skip_defaults or self.padding != "ignore": options["padding"] = self.padding return options
Ancestors
- BaseEncoding
- abc.ABC
Instance variables
var block_nchars : int
-
Expand source code
@property def block_nchars(self) -> int: return self._block_nchars
var char_nbits : int
-
Expand source code
@property def char_nbits(self) -> int: return self._char_nbits
var effective_base : int
-
Expand source code
@property def effective_base(self) -> int: effective_base: int = 2**self.char_nbits return effective_base
var include_padding : bool
-
Expand source code
@property def include_padding(self) -> bool: return self.padding in ("include", "require")
var pad_char : Optional[str]
-
An optional character to be used for padding of encoded strings. In rfc4648, this is
"="
for both base64 and base32, but it isNone
for base16 (where no padding of encoded strings is ever required).Expand source code
@property def pad_char(self) -> Optional[str]: """ An optional character to be used for padding of encoded strings. In [rfc4648](https://datatracker.ietf.org/doc/html/rfc4648.html), this is `"="` for both base64 and base32, but it is `None` for base16 (where no padding of encoded strings is ever required). """ return self._pad_char
var padding : Literal['ignore', 'include', 'require']
-
Expand source code
@property def padding(self) -> PaddingOptions: return self._padding
var require_padding : bool
-
Expand source code
@property def require_padding(self) -> bool: return self.padding == "require"
Methods
def canonical_bytes(self, b: bytes) ‑> bytes
-
Expand source code
def canonical_bytes(self, b: bytes) -> bytes: self._validate_bytes(b) return b
def canonical_string(self, s: str) ‑> str
-
Expand source code
def canonical_string(self, s: str) -> str: if self.include_padding: return self.pad_string(s) return self.strip_string(s)
def nopad(self, allow: bool = True) ‑> FixedCharBaseEncoding
-
Expand source code
def nopad(self, allow: bool = True) -> "FixedCharBaseEncoding": options = dict(include_padding=False, require_padding=False, pad_char=self.pad_char if allow else None) return self.with_options(**options)
def options(self, skip_defaults: bool = False) ‑> Mapping[str, Any]
-
Expand source code
def options(self, skip_defaults: bool = False) -> Mapping[str, Any]: options: Dict[str, Any] = {} if not skip_defaults or self._init_char_nbits != "auto": options["char_nbits"] = self._init_char_nbits if not skip_defaults or self.pad_char is not None: options["pad_char"] = self.pad_char if not skip_defaults or self.padding != "ignore": options["padding"] = self.padding return options
def pad(self, require: bool = False) ‑> FixedCharBaseEncoding
-
Expand source code
def pad(self, require: bool = False) -> "FixedCharBaseEncoding": options = dict(include_padding=True, require_padding=require) return self.with_options(**options)
def pad_string(self, s: str) ‑> str
-
If no padding character is specified for this encoding, returns the input string unchanged. If a padding character is specified for this encoding, pads the input string by appending the minimum number of padding characters necessary to make its length an integral multiple of the block char size (given by
FixedCharBaseEncoding.block_nchars
).Expand source code
def pad_string(self, s: str) -> str: """ If no padding character is specified for this encoding, returns the input string unchanged. If a padding character is specified for this encoding, pads the input string by appending the minimum number of padding characters necessary to make its length an integral multiple of the block char size (given by `FixedCharBaseEncoding.block_nchars`). """ if not isinstance(s, str): raise TypeError() pad_char = self.pad_char block_nchars = self._block_nchars # no padding available for this encoding scheme if pad_char is None: return s # padding available, but no need for padding if len(s)%block_nchars == 0: return s # compute require padding length pad_len = block_nchars-(len(s)%block_nchars) # return padded string return s+pad_char*pad_len
def strip_string(self, s: str) ‑> str
-
Expand source code
def strip_string(self, s: str) -> str: if not isinstance(s, str): raise TypeError() pad_char = self.pad_char case_sensitive = self.case_sensitive block_nchars = self._block_nchars # no padding available for this encoding scheme if pad_char is None: return s # padding character(s) to strip from the right of the string pad_chars = pad_char if not case_sensitive: pad_chars += pad_char.lower()+pad_char.upper() # strip padding from string s_stripped = s.rstrip(pad_chars) # if padding is required on decoding, check the correct amount was included if self.require_padding: padding = len(s)-len(s_stripped) extra_nchars = len(s_stripped)%block_nchars expected_padding = 0 if extra_nchars == 0 else block_nchars-extra_nchars if padding != expected_padding: raise PaddingError(padding, expected_padding) return s_stripped
def with_pad_char(self, pad_char: Optional[str]) ‑> FixedCharBaseEncoding
-
Expand source code
def with_pad_char(self, pad_char: Optional[str]) -> "FixedCharBaseEncoding": options: Dict[str, Any] = dict(pad_char=pad_char) if pad_char is None: options["include_padding"] = False options["require_padding"] = False return self.with_options(**options)
Inherited members