Module bases.encoding.block

Block base encodings.

Expand source code
"""
    Block base encodings.
"""

import math
from types import MappingProxyType
from typing import Any, Dict, List, Mapping, Optional, Union
from typing_validation import validate

from bases.alphabet import Alphabet
from .base import BaseEncoding
from .simple import SimpleBaseEncoding
from .zeropad import ZeropadBaseEncoding
from .errors import EncodingError, DecodingError, InvalidCharBlockError, InvalidByteBlockError

class BlockBaseEncoding(BaseEncoding):
    """
        Block base encodings. Split the bytestring to encode (resp. string to decode) into blocks,
        then encodes (resp. decodes) each block individually using an underlying encoding.
        By default, the underlying encoding is a `bases.encoding.simple.SimpleBaseEncoding`.

        Constructor options:

        - `block_size: Union[int, Mapping[int, int]]` cf. below
        - `sep_char: str = ""` an optional separator character for encoded string blocks (empty string if unspecified)
        - `reverse_blocks: bool = False` an optional flag to reverse individual char blocks in the encoded string

        The `block_size` option is mandatory and determines the allowed block sizes for encoding and decoding:

        - if `block_size` is a strictly increasing mapping of positive integers to positive integers, its keys are taken
          to be the allowed block byte sizes and its values are taken to be the corresponding block char sizes.
        - if `block_size` is an integer, all block byte sizes in `range(1, block_size+1)` are allowed, and the coresponding
          block char sizes are computed by:

          ```py
          char_size = int(math.floor(math.log(256**byte_size, base)))+1
          ```

        The property `BlockBaseEncoding.nbytes2nchars` has all valid block byte sizes as keys and the corresponding block char sizes as values.
        The property `BlockBaseEncoding.nchars2nbytes` has all valid block char sizes as keys and the corresponding block byte sizes as values.
        Each pair of corresponding block byte and char sizes is assessed to ensure that encoding and decoding are unambiguous,
        using the static methods `ZeropadBaseEncoding.max_block_nchars` and `ZeropadBaseEncoding.max_block_nbytes`.

        The maximum valid block byte (resp. char) size is used on encoding (resp. decoding) for all blocks except at most the last one:
        if the number of bytes (resp. chars) in the last block is not valid, the bytestring (resp. string) is not valid overall.

        As a concrete example, the following is the constructor for the [base45 encoding](https://datatracker.ietf.org/doc/draft-faltstrom-base45/):

        ```py
        base45 = BlockBaseEncoding(alphabet.base45, block_size={1: 2, 2: 3})
        ```

        In this case, encoding uses blocks of 2 bytes, with the final block allowed to be 1 or 2 bytes. Decoding uses blocks of 3 chars, with the
        final block allowed to be 2 or 3 chars (but not 1 char). Because no encoding was explicitly specified, the encoding used is the simple
        encoding for the base45 alphabet.

        Encoding of a bytestring `b`:

        1. split `b` into blocks of size `BlockBaseEncoding.block_nbytes`, with the final block allowed to be any size in `BlockBaseEncoding.nbytes2nchars`
           (raise `bases.encoding.errors.EncodingError` if it isn't)
        2. encode each block individually using the `BlockBaseEncoding.block_encoding`
        3. check that no encoded block string exceeds the block char size corresponding to the original block byte size
        4. prepend zero chars to each encoded block string until it reaches the designated block char size
        5. if `reverse_blocks`, reverse each individual char block
        6. join the blocks into the final encoded string (using the separator character `BlockBaseEncoding.sep_char`, if specified)

        Decoding of a string `s`:

        1. split `s` into blocks of size `BlockBaseEncoding.block_nchars`, with the final block allowed to be any size in `BlockBaseEncoding.nchars2nbytes`
           (raise `bases.encoding.errors.DecodingError` if it isn't)
        2. if `reverse_blocks`, reverse each individual char block
        3. decode each block individually using the `BlockBaseEncoding.block_encoding`
        4. check that no decode block bytestring exceeds the block byte size corresponding to the original block char size
        5. prepend zero bytes to each decoded block bytestring until it reaches the designated block byte size
        6. join the blocks into the final decoded bytestring

    """
    # pylint: disable = too-many-instance-attributes

    _init_encoding: Union[str, range, Alphabet, BaseEncoding]
    _init_case_sensitive: Optional[bool]
    _init_block_size: Union[int, Mapping[int, int]]

    _block_encoding: BaseEncoding
    _nbytes2nchars: Mapping[int, int]
    _nchars2nbytes: Mapping[int, int]
    _block_nbytes: int
    _sep_char: str = ""
    _block_nchars: int
    _reverse_blocks: bool = False

    def __init__(self, encoding: Union[str, range, Alphabet, BaseEncoding], *,
                 case_sensitive: Optional[bool] = None,
                 block_size: Union[int, Mapping[int, int]],
                 sep_char: str = "",
                 reverse_blocks: bool = False):
        validate(encoding, Union[str, range, Alphabet, BaseEncoding])
        validate(block_size, Union[int, Mapping[int, int]])
        validate(sep_char, str)
        validate(reverse_blocks, bool)
        self._init_encoding = encoding
        self._init_case_sensitive = case_sensitive
        self._init_block_size = block_size
        if isinstance(encoding, BaseEncoding):
            alphabet: Union[str, range, Alphabet] = encoding.alphabet
        else:
            alphabet = encoding
            encoding = SimpleBaseEncoding(alphabet)
        super().__init__(alphabet, case_sensitive=case_sensitive)
        self._block_encoding = encoding
        self._sep_char = sep_char
        self._reverse_blocks = reverse_blocks
        if isinstance(block_size, int):
            base = self.base
            block_sizes: Mapping[int, int] = {
                i: int(math.floor(math.log(256**i, base)))+1
                for i in range(1, block_size+1)
            }
        else:
            block_sizes = block_size
        self._nbytes2nchars = MappingProxyType({
            nbytes: block_sizes[nbytes]
            for nbytes in sorted(block_sizes)
        })
        _nchars2nbytes = {
            nchars: nbytes for nbytes, nchars in block_sizes.items()
        }
        self._nchars2nbytes = MappingProxyType({
            nchars: _nchars2nbytes[nchars]
            for nchars in sorted(_nchars2nbytes)
        })
        self._block_nbytes = max(self.nbytes2nchars)
        self._block_nchars = max(self.nchars2nbytes)
        self.__validate_init()

    def __validate_init(self) -> None:
        base = self.base
        sep_char = self.sep_char
        nbytes2nchars = self.nbytes2nchars
        if len(sep_char) not in (0, 1):
            raise ValueError("Separator character must be empty string or length 1 string.")
        prev_nchars: Optional[int] = None
        for _, nchars in nbytes2nchars.items():
            if prev_nchars is None:
                prev_nchars = nchars
            elif prev_nchars >= nchars:
                raise ValueError("Block char size must strictly increase with block byte size.")
        block_nbytes = self.block_nbytes
        block_nchars = self.block_nchars
        max_block_nbytes = ZeropadBaseEncoding.max_block_nbytes(base, block_nchars)
        max_block_nchars = ZeropadBaseEncoding.max_block_nchars(base, block_nbytes)
        if block_nchars > max_block_nchars:
            raise ValueError(f"Number of characters allowed in largest block is too large: "
                             f"the maximum for base = {base} and block_nbytes = {block_nbytes} is "
                             f"block_nchars = {max_block_nchars}")
        if block_nbytes > max_block_nbytes:
            raise ValueError(f"Number of bytes allowed in largest block is too large: "
                             f"the maximum for base = {base} and block_nchars {block_nchars} is "
                             f"block_nbytes = {max_block_nbytes}")

    @property
    def block_encoding(self) -> BaseEncoding:
        """
            The encoding used for individual blocks.
        """
        return self._block_encoding

    @property
    def nbytes2nchars(self) -> Mapping[int, int]:
        """
            Mapping of bytes block sizes to char block sizes.
        """
        return self._nbytes2nchars

    @property
    def nchars2nbytes(self) -> Mapping[int, int]:
        """
            Mapping of char block sizes to byte block sizes.
        """
        return self._nchars2nbytes

    @property
    def block_nbytes(self) -> int:
        """
            Number of bytes in the largest blocks.
        """
        return self._block_nbytes

    @property
    def block_nchars(self) -> int:
        """
            Number of characters in the largest blocks.
        """
        return self._block_nchars

    @property
    def sep_char(self) -> str:
        """
            Optional block separation character.
            It is either the empty string, or a string of length 1.
        """
        return self._sep_char

    @property
    def reverse_blocks(self) -> bool:
        """
            Whether individual char block should be reversed when encoding,
            e.g. as done by the [base45 spec](https://datatracker.ietf.org/doc/draft-faltstrom-base45/)
        """
        return self._reverse_blocks

    def canonical_bytes(self, b: bytes) -> bytes:
        self._validate_bytes(b)
        return b

    def canonical_string(self, s: str) -> str:
        self._validate_string(s)
        return s

    def _validate_bytes(self, b: bytes) -> bytes:
        b = super()._validate_bytes(b)
        last_block_nbytes = len(b)%self.block_nbytes
        if last_block_nbytes > 0 and last_block_nbytes not in self.nbytes2nchars:
            raise EncodingError(f"Last block of {last_block_nbytes} bytes not allowed.")
        return b

    def _validate_string(self, s: str) -> str:
        validate(s, str)
        sep_char = self.sep_char
        block_nchars = self.block_nchars
        if sep_char:
            char_blocks: List[str] = []
            for idx in range(0, len(s), block_nchars+1):
                char_block = s[idx:idx+block_nchars+1]
                if len(char_block) == block_nchars+1:
                    # intermediate block, must terminate with separator
                    if char_block[-1] != sep_char:
                        raise DecodingError(f"Missing separator at end of block #{idx}")
                    char_blocks.append(char_block[:-1])
                else:
                    # final block
                    char_blocks.append(char_block)
            s = "".join(char_blocks)
        s = super()._validate_string(s)
        last_block_nchars = len(s)%self.block_nchars
        if last_block_nchars > 0 and last_block_nchars not in self.nchars2nbytes:
            raise EncodingError(f"Last block of {last_block_nchars} chars not allowed.")
        return s

    def _encode(self, b: bytes) -> str:
        zero_char = self.zero_char
        block_nbytes = self.block_nbytes
        nbytes2nchars = self.nbytes2nchars
        reverse_blocks = self.reverse_blocks
        # convert byte blocks into char blocks (all but last are block_nbytes long)
        char_blocks: List[str] = []
        for idx in range(0, len(b), block_nbytes):
            # extract next byte block
            byte_block = b[idx:idx+block_nbytes]
            # simple encoding of byte block
            s = self._block_encoding.encode(byte_block.lstrip(b"\x00"))
            # number of chars in corresponding char block
            block_nchars = nbytes2nchars[len(byte_block)]
            if len(s) > block_nchars:
                raise InvalidByteBlockError(f"Encoded value too large. Block bytes: {list(byte_block)}, encoded chars: {repr(s)}"
                                            f"expected num of encoded chars: {block_nchars}).")
            # pad char block to required number of characters and add to list
            char_block = zero_char*(block_nchars-len(s))+s
            if reverse_blocks:
                char_block = char_block[::-1]
            char_blocks.append(char_block)
            print(list(byte_block), repr(char_block))
        # join character blocks to form encoded string
        return "".join(char_blocks)

    def _decode(self, s: str) -> bytes:
        zero_char = self.zero_char
        block_nchars = self.block_nchars
        nchars2nbytes = self.nchars2nbytes
        reverse_blocks = self.reverse_blocks
        # convert char blocks into byte blocks (all but last are block_nchars long)
        byte_blocks: List[bytes] = []
        for idx in range(0, len(s), block_nchars):
            # extract next char block
            char_block = s[idx:idx+block_nchars]
            if reverse_blocks:
                char_block = char_block[::-1]
            # simple decoding of char block
            b = self._block_encoding.decode(char_block.lstrip(zero_char))
            # number of bytes in corresponding byte block
            block_nbytes = nchars2nbytes[len(char_block)]
            if len(b) > block_nbytes:
                raise InvalidCharBlockError(f"Decoded value too large. Block chars: {repr(char_block)}, decoded bytes: {list(b)}"
                                            f"expected num of decoded bytes: {block_nbytes}).")
            # pad byte block to required number of bytes and add to list
            byte_blocks.append(b"\x00"*(block_nbytes-len(b))+b)
        # join byte blocks to form encoded string
        return b"".join(byte_blocks)

    def options(self, skip_defaults: bool = False) -> Mapping[str, Any]:
        validate(skip_defaults, bool)
        options: Dict[str, Any] = {
            "block_size": self._init_block_size,
        }
        if not skip_defaults or self.sep_char != "":
            options["sep_char"] = self.sep_char
        if not skip_defaults or self.reverse_blocks is not False:
            options["reverse_blocks"] = self.reverse_blocks
        return options

    def with_options(self, **options: Any) -> "BlockBaseEncoding":
        new_options = {**self.options()}
        for name in options:
            if name not in new_options:
                raise KeyError(f"Unknown option {repr(name)} for {type(self).__name__}")
        new_options.update(options)
        if isinstance(self._init_encoding, BaseEncoding):
            return type(self)(self._init_encoding, case_sensitive=self._init_case_sensitive, **new_options)
        return type(self)(self.alphabet, **new_options)

    def __eq__(self, other: Any) -> bool:
        super_eq = super().__eq__(other)
        if super_eq in (False, NotImplemented):
            return super_eq
        if not isinstance(other, BlockBaseEncoding):
            return NotImplemented
        if isinstance(self._init_encoding, BaseEncoding):
            return self._init_encoding == other._init_encoding and self.case_sensitive == other.case_sensitive
        return True

    def __hash__(self) -> int:
        return hash((type(self), self.alphabet, self.block_encoding, tuple(self.options().items())))

    def __repr__(self) -> str:
        type_name = type(self).__name__
        if isinstance(self._init_encoding, BaseEncoding):
            alphabet_str = f"{self._init_encoding}, case_sensitive={self._init_case_sensitive}"
        else:
            alphabet_str = repr(self.alphabet)
        options = self.options(skip_defaults=True)
        if not options:
            return f"{type_name}({alphabet_str})"
        options_str = ", ".join(f"{name}={repr(value)}" for name, value in options.items())
        return f"{type_name}({alphabet_str}, {options_str})"

Classes

class BlockBaseEncoding (encoding: Union[str, range, AlphabetBaseEncoding], *, case_sensitive: Optional[bool] = None, block_size: Union[int, Mapping[int, int]], sep_char: str = '', reverse_blocks: bool = False)

Block base encodings. Split the bytestring to encode (resp. string to decode) into blocks, then encodes (resp. decodes) each block individually using an underlying encoding. By default, the underlying encoding is a SimpleBaseEncoding.

Constructor options:

  • block_size: Union[int, Mapping[int, int]] cf. below
  • sep_char: str = "" an optional separator character for encoded string blocks (empty string if unspecified)
  • reverse_blocks: bool = False an optional flag to reverse individual char blocks in the encoded string

The block_size option is mandatory and determines the allowed block sizes for encoding and decoding:

  • if block_size is a strictly increasing mapping of positive integers to positive integers, its keys are taken to be the allowed block byte sizes and its values are taken to be the corresponding block char sizes.
  • if block_size is an integer, all block byte sizes in range(1, block_size+1) are allowed, and the coresponding block char sizes are computed by:

py char_size = int(math.floor(math.log(256**byte_size, base)))+1

The property BlockBaseEncoding.nbytes2nchars has all valid block byte sizes as keys and the corresponding block char sizes as values. The property BlockBaseEncoding.nchars2nbytes has all valid block char sizes as keys and the corresponding block byte sizes as values. Each pair of corresponding block byte and char sizes is assessed to ensure that encoding and decoding are unambiguous, using the static methods ZeropadBaseEncoding.max_block_nchars and ZeropadBaseEncoding.max_block_nbytes.

The maximum valid block byte (resp. char) size is used on encoding (resp. decoding) for all blocks except at most the last one: if the number of bytes (resp. chars) in the last block is not valid, the bytestring (resp. string) is not valid overall.

As a concrete example, the following is the constructor for the base45 encoding:

base45 = BlockBaseEncoding(alphabet.base45, block_size={1: 2, 2: 3})

In this case, encoding uses blocks of 2 bytes, with the final block allowed to be 1 or 2 bytes. Decoding uses blocks of 3 chars, with the final block allowed to be 2 or 3 chars (but not 1 char). Because no encoding was explicitly specified, the encoding used is the simple encoding for the base45 alphabet.

Encoding of a bytestring b:

  1. split b into blocks of size BlockBaseEncoding.block_nbytes, with the final block allowed to be any size in BlockBaseEncoding.nbytes2nchars (raise EncodingError if it isn't)
  2. encode each block individually using the BlockBaseEncoding.block_encoding
  3. check that no encoded block string exceeds the block char size corresponding to the original block byte size
  4. prepend zero chars to each encoded block string until it reaches the designated block char size
  5. if reverse_blocks, reverse each individual char block
  6. join the blocks into the final encoded string (using the separator character BlockBaseEncoding.sep_char, if specified)

Decoding of a string s:

  1. split s into blocks of size BlockBaseEncoding.block_nchars, with the final block allowed to be any size in BlockBaseEncoding.nchars2nbytes (raise DecodingError if it isn't)
  2. if reverse_blocks, reverse each individual char block
  3. decode each block individually using the BlockBaseEncoding.block_encoding
  4. check that no decode block bytestring exceeds the block byte size corresponding to the original block char size
  5. prepend zero bytes to each decoded block bytestring until it reaches the designated block byte size
  6. join the blocks into the final decoded bytestring
Expand source code
class BlockBaseEncoding(BaseEncoding):
    """
        Block base encodings. Split the bytestring to encode (resp. string to decode) into blocks,
        then encodes (resp. decodes) each block individually using an underlying encoding.
        By default, the underlying encoding is a `bases.encoding.simple.SimpleBaseEncoding`.

        Constructor options:

        - `block_size: Union[int, Mapping[int, int]]` cf. below
        - `sep_char: str = ""` an optional separator character for encoded string blocks (empty string if unspecified)
        - `reverse_blocks: bool = False` an optional flag to reverse individual char blocks in the encoded string

        The `block_size` option is mandatory and determines the allowed block sizes for encoding and decoding:

        - if `block_size` is a strictly increasing mapping of positive integers to positive integers, its keys are taken
          to be the allowed block byte sizes and its values are taken to be the corresponding block char sizes.
        - if `block_size` is an integer, all block byte sizes in `range(1, block_size+1)` are allowed, and the coresponding
          block char sizes are computed by:

          ```py
          char_size = int(math.floor(math.log(256**byte_size, base)))+1
          ```

        The property `BlockBaseEncoding.nbytes2nchars` has all valid block byte sizes as keys and the corresponding block char sizes as values.
        The property `BlockBaseEncoding.nchars2nbytes` has all valid block char sizes as keys and the corresponding block byte sizes as values.
        Each pair of corresponding block byte and char sizes is assessed to ensure that encoding and decoding are unambiguous,
        using the static methods `ZeropadBaseEncoding.max_block_nchars` and `ZeropadBaseEncoding.max_block_nbytes`.

        The maximum valid block byte (resp. char) size is used on encoding (resp. decoding) for all blocks except at most the last one:
        if the number of bytes (resp. chars) in the last block is not valid, the bytestring (resp. string) is not valid overall.

        As a concrete example, the following is the constructor for the [base45 encoding](https://datatracker.ietf.org/doc/draft-faltstrom-base45/):

        ```py
        base45 = BlockBaseEncoding(alphabet.base45, block_size={1: 2, 2: 3})
        ```

        In this case, encoding uses blocks of 2 bytes, with the final block allowed to be 1 or 2 bytes. Decoding uses blocks of 3 chars, with the
        final block allowed to be 2 or 3 chars (but not 1 char). Because no encoding was explicitly specified, the encoding used is the simple
        encoding for the base45 alphabet.

        Encoding of a bytestring `b`:

        1. split `b` into blocks of size `BlockBaseEncoding.block_nbytes`, with the final block allowed to be any size in `BlockBaseEncoding.nbytes2nchars`
           (raise `bases.encoding.errors.EncodingError` if it isn't)
        2. encode each block individually using the `BlockBaseEncoding.block_encoding`
        3. check that no encoded block string exceeds the block char size corresponding to the original block byte size
        4. prepend zero chars to each encoded block string until it reaches the designated block char size
        5. if `reverse_blocks`, reverse each individual char block
        6. join the blocks into the final encoded string (using the separator character `BlockBaseEncoding.sep_char`, if specified)

        Decoding of a string `s`:

        1. split `s` into blocks of size `BlockBaseEncoding.block_nchars`, with the final block allowed to be any size in `BlockBaseEncoding.nchars2nbytes`
           (raise `bases.encoding.errors.DecodingError` if it isn't)
        2. if `reverse_blocks`, reverse each individual char block
        3. decode each block individually using the `BlockBaseEncoding.block_encoding`
        4. check that no decode block bytestring exceeds the block byte size corresponding to the original block char size
        5. prepend zero bytes to each decoded block bytestring until it reaches the designated block byte size
        6. join the blocks into the final decoded bytestring

    """
    # pylint: disable = too-many-instance-attributes

    _init_encoding: Union[str, range, Alphabet, BaseEncoding]
    _init_case_sensitive: Optional[bool]
    _init_block_size: Union[int, Mapping[int, int]]

    _block_encoding: BaseEncoding
    _nbytes2nchars: Mapping[int, int]
    _nchars2nbytes: Mapping[int, int]
    _block_nbytes: int
    _sep_char: str = ""
    _block_nchars: int
    _reverse_blocks: bool = False

    def __init__(self, encoding: Union[str, range, Alphabet, BaseEncoding], *,
                 case_sensitive: Optional[bool] = None,
                 block_size: Union[int, Mapping[int, int]],
                 sep_char: str = "",
                 reverse_blocks: bool = False):
        validate(encoding, Union[str, range, Alphabet, BaseEncoding])
        validate(block_size, Union[int, Mapping[int, int]])
        validate(sep_char, str)
        validate(reverse_blocks, bool)
        self._init_encoding = encoding
        self._init_case_sensitive = case_sensitive
        self._init_block_size = block_size
        if isinstance(encoding, BaseEncoding):
            alphabet: Union[str, range, Alphabet] = encoding.alphabet
        else:
            alphabet = encoding
            encoding = SimpleBaseEncoding(alphabet)
        super().__init__(alphabet, case_sensitive=case_sensitive)
        self._block_encoding = encoding
        self._sep_char = sep_char
        self._reverse_blocks = reverse_blocks
        if isinstance(block_size, int):
            base = self.base
            block_sizes: Mapping[int, int] = {
                i: int(math.floor(math.log(256**i, base)))+1
                for i in range(1, block_size+1)
            }
        else:
            block_sizes = block_size
        self._nbytes2nchars = MappingProxyType({
            nbytes: block_sizes[nbytes]
            for nbytes in sorted(block_sizes)
        })
        _nchars2nbytes = {
            nchars: nbytes for nbytes, nchars in block_sizes.items()
        }
        self._nchars2nbytes = MappingProxyType({
            nchars: _nchars2nbytes[nchars]
            for nchars in sorted(_nchars2nbytes)
        })
        self._block_nbytes = max(self.nbytes2nchars)
        self._block_nchars = max(self.nchars2nbytes)
        self.__validate_init()

    def __validate_init(self) -> None:
        base = self.base
        sep_char = self.sep_char
        nbytes2nchars = self.nbytes2nchars
        if len(sep_char) not in (0, 1):
            raise ValueError("Separator character must be empty string or length 1 string.")
        prev_nchars: Optional[int] = None
        for _, nchars in nbytes2nchars.items():
            if prev_nchars is None:
                prev_nchars = nchars
            elif prev_nchars >= nchars:
                raise ValueError("Block char size must strictly increase with block byte size.")
        block_nbytes = self.block_nbytes
        block_nchars = self.block_nchars
        max_block_nbytes = ZeropadBaseEncoding.max_block_nbytes(base, block_nchars)
        max_block_nchars = ZeropadBaseEncoding.max_block_nchars(base, block_nbytes)
        if block_nchars > max_block_nchars:
            raise ValueError(f"Number of characters allowed in largest block is too large: "
                             f"the maximum for base = {base} and block_nbytes = {block_nbytes} is "
                             f"block_nchars = {max_block_nchars}")
        if block_nbytes > max_block_nbytes:
            raise ValueError(f"Number of bytes allowed in largest block is too large: "
                             f"the maximum for base = {base} and block_nchars {block_nchars} is "
                             f"block_nbytes = {max_block_nbytes}")

    @property
    def block_encoding(self) -> BaseEncoding:
        """
            The encoding used for individual blocks.
        """
        return self._block_encoding

    @property
    def nbytes2nchars(self) -> Mapping[int, int]:
        """
            Mapping of bytes block sizes to char block sizes.
        """
        return self._nbytes2nchars

    @property
    def nchars2nbytes(self) -> Mapping[int, int]:
        """
            Mapping of char block sizes to byte block sizes.
        """
        return self._nchars2nbytes

    @property
    def block_nbytes(self) -> int:
        """
            Number of bytes in the largest blocks.
        """
        return self._block_nbytes

    @property
    def block_nchars(self) -> int:
        """
            Number of characters in the largest blocks.
        """
        return self._block_nchars

    @property
    def sep_char(self) -> str:
        """
            Optional block separation character.
            It is either the empty string, or a string of length 1.
        """
        return self._sep_char

    @property
    def reverse_blocks(self) -> bool:
        """
            Whether individual char block should be reversed when encoding,
            e.g. as done by the [base45 spec](https://datatracker.ietf.org/doc/draft-faltstrom-base45/)
        """
        return self._reverse_blocks

    def canonical_bytes(self, b: bytes) -> bytes:
        self._validate_bytes(b)
        return b

    def canonical_string(self, s: str) -> str:
        self._validate_string(s)
        return s

    def _validate_bytes(self, b: bytes) -> bytes:
        b = super()._validate_bytes(b)
        last_block_nbytes = len(b)%self.block_nbytes
        if last_block_nbytes > 0 and last_block_nbytes not in self.nbytes2nchars:
            raise EncodingError(f"Last block of {last_block_nbytes} bytes not allowed.")
        return b

    def _validate_string(self, s: str) -> str:
        validate(s, str)
        sep_char = self.sep_char
        block_nchars = self.block_nchars
        if sep_char:
            char_blocks: List[str] = []
            for idx in range(0, len(s), block_nchars+1):
                char_block = s[idx:idx+block_nchars+1]
                if len(char_block) == block_nchars+1:
                    # intermediate block, must terminate with separator
                    if char_block[-1] != sep_char:
                        raise DecodingError(f"Missing separator at end of block #{idx}")
                    char_blocks.append(char_block[:-1])
                else:
                    # final block
                    char_blocks.append(char_block)
            s = "".join(char_blocks)
        s = super()._validate_string(s)
        last_block_nchars = len(s)%self.block_nchars
        if last_block_nchars > 0 and last_block_nchars not in self.nchars2nbytes:
            raise EncodingError(f"Last block of {last_block_nchars} chars not allowed.")
        return s

    def _encode(self, b: bytes) -> str:
        zero_char = self.zero_char
        block_nbytes = self.block_nbytes
        nbytes2nchars = self.nbytes2nchars
        reverse_blocks = self.reverse_blocks
        # convert byte blocks into char blocks (all but last are block_nbytes long)
        char_blocks: List[str] = []
        for idx in range(0, len(b), block_nbytes):
            # extract next byte block
            byte_block = b[idx:idx+block_nbytes]
            # simple encoding of byte block
            s = self._block_encoding.encode(byte_block.lstrip(b"\x00"))
            # number of chars in corresponding char block
            block_nchars = nbytes2nchars[len(byte_block)]
            if len(s) > block_nchars:
                raise InvalidByteBlockError(f"Encoded value too large. Block bytes: {list(byte_block)}, encoded chars: {repr(s)}"
                                            f"expected num of encoded chars: {block_nchars}).")
            # pad char block to required number of characters and add to list
            char_block = zero_char*(block_nchars-len(s))+s
            if reverse_blocks:
                char_block = char_block[::-1]
            char_blocks.append(char_block)
            print(list(byte_block), repr(char_block))
        # join character blocks to form encoded string
        return "".join(char_blocks)

    def _decode(self, s: str) -> bytes:
        zero_char = self.zero_char
        block_nchars = self.block_nchars
        nchars2nbytes = self.nchars2nbytes
        reverse_blocks = self.reverse_blocks
        # convert char blocks into byte blocks (all but last are block_nchars long)
        byte_blocks: List[bytes] = []
        for idx in range(0, len(s), block_nchars):
            # extract next char block
            char_block = s[idx:idx+block_nchars]
            if reverse_blocks:
                char_block = char_block[::-1]
            # simple decoding of char block
            b = self._block_encoding.decode(char_block.lstrip(zero_char))
            # number of bytes in corresponding byte block
            block_nbytes = nchars2nbytes[len(char_block)]
            if len(b) > block_nbytes:
                raise InvalidCharBlockError(f"Decoded value too large. Block chars: {repr(char_block)}, decoded bytes: {list(b)}"
                                            f"expected num of decoded bytes: {block_nbytes}).")
            # pad byte block to required number of bytes and add to list
            byte_blocks.append(b"\x00"*(block_nbytes-len(b))+b)
        # join byte blocks to form encoded string
        return b"".join(byte_blocks)

    def options(self, skip_defaults: bool = False) -> Mapping[str, Any]:
        validate(skip_defaults, bool)
        options: Dict[str, Any] = {
            "block_size": self._init_block_size,
        }
        if not skip_defaults or self.sep_char != "":
            options["sep_char"] = self.sep_char
        if not skip_defaults or self.reverse_blocks is not False:
            options["reverse_blocks"] = self.reverse_blocks
        return options

    def with_options(self, **options: Any) -> "BlockBaseEncoding":
        new_options = {**self.options()}
        for name in options:
            if name not in new_options:
                raise KeyError(f"Unknown option {repr(name)} for {type(self).__name__}")
        new_options.update(options)
        if isinstance(self._init_encoding, BaseEncoding):
            return type(self)(self._init_encoding, case_sensitive=self._init_case_sensitive, **new_options)
        return type(self)(self.alphabet, **new_options)

    def __eq__(self, other: Any) -> bool:
        super_eq = super().__eq__(other)
        if super_eq in (False, NotImplemented):
            return super_eq
        if not isinstance(other, BlockBaseEncoding):
            return NotImplemented
        if isinstance(self._init_encoding, BaseEncoding):
            return self._init_encoding == other._init_encoding and self.case_sensitive == other.case_sensitive
        return True

    def __hash__(self) -> int:
        return hash((type(self), self.alphabet, self.block_encoding, tuple(self.options().items())))

    def __repr__(self) -> str:
        type_name = type(self).__name__
        if isinstance(self._init_encoding, BaseEncoding):
            alphabet_str = f"{self._init_encoding}, case_sensitive={self._init_case_sensitive}"
        else:
            alphabet_str = repr(self.alphabet)
        options = self.options(skip_defaults=True)
        if not options:
            return f"{type_name}({alphabet_str})"
        options_str = ", ".join(f"{name}={repr(value)}" for name, value in options.items())
        return f"{type_name}({alphabet_str}, {options_str})"

Ancestors

Instance variables

var block_encodingBaseEncoding

The encoding used for individual blocks.

Expand source code
@property
def block_encoding(self) -> BaseEncoding:
    """
        The encoding used for individual blocks.
    """
    return self._block_encoding
var block_nbytes : int

Number of bytes in the largest blocks.

Expand source code
@property
def block_nbytes(self) -> int:
    """
        Number of bytes in the largest blocks.
    """
    return self._block_nbytes
var block_nchars : int

Number of characters in the largest blocks.

Expand source code
@property
def block_nchars(self) -> int:
    """
        Number of characters in the largest blocks.
    """
    return self._block_nchars
var nbytes2nchars : Mapping[int, int]

Mapping of bytes block sizes to char block sizes.

Expand source code
@property
def nbytes2nchars(self) -> Mapping[int, int]:
    """
        Mapping of bytes block sizes to char block sizes.
    """
    return self._nbytes2nchars
var nchars2nbytes : Mapping[int, int]

Mapping of char block sizes to byte block sizes.

Expand source code
@property
def nchars2nbytes(self) -> Mapping[int, int]:
    """
        Mapping of char block sizes to byte block sizes.
    """
    return self._nchars2nbytes
var reverse_blocks : bool

Whether individual char block should be reversed when encoding, e.g. as done by the base45 spec

Expand source code
@property
def reverse_blocks(self) -> bool:
    """
        Whether individual char block should be reversed when encoding,
        e.g. as done by the [base45 spec](https://datatracker.ietf.org/doc/draft-faltstrom-base45/)
    """
    return self._reverse_blocks
var sep_char : str

Optional block separation character. It is either the empty string, or a string of length 1.

Expand source code
@property
def sep_char(self) -> str:
    """
        Optional block separation character.
        It is either the empty string, or a string of length 1.
    """
    return self._sep_char

Inherited members