Module bases.encoding.zeropad

Zero-padded base encodings.

Expand source code
"""
    Zero-padded base encodings.
"""

import math
from typing import Any, Dict, Mapping, Optional, Union
from typing_validation import validate

from bases.alphabet import Alphabet
from .base import BaseEncoding
from .simple import SimpleBaseEncoding

class ZeropadBaseEncoding(BaseEncoding):
    """
        Zero-added base encodings. Similar to `bases.encoding.simple.SimpleBaseEncoding`, but additionally:

        - preserves leading zeros
        - optionally enforces a fixed block size for encoded strings and decoded bytestrings

        Constructor options:

        - `block_nbytes: int = 1` number of bytes in a block (for decoded bytestrings)
        - `block_nchars: int = 1` number of chars in a block (for encoded strings)

        The static method `ZeropadBaseEncoding.max_block_nchars` (resp. `ZeropadBaseEncoding.max_block_nbytes`) gives the
        maximum block size in chars (resp. bytes) that can be used for a given block size in bytes (resp. chars).
        This is to ensure that encoding/decoding can always be performed unambiguously.

        Encoding of a bytestring `b`:

        1. count the number Z of leading zero byte blocks in `b` and strip them (default: count zero bytes and strip them)
        2. encode `b` as `bases.encoding.simple.SimpleBaseEncoding` would
        3. prepend the minimum number of zero chars necessary to make the encoded string length an integral multiple of `block_nchars`
        4. prepend Z zero char blocks to the encoded string

        Decoding of a string `s`:

        1. count the number Z of leading zero char blocks in `s` and strip them (default: count zero chars and strip them)
        2. decode `s` as `bases.encoding.simple.SimpleBaseEncoding` would
        3. prepend the minimum number of zero bytes necessary to make the decoded bytestring length an integral multiple of `block_nbytes`
        4. prepend Z zero byte blocks to the encoded string

    """

    _simple_encoding: SimpleBaseEncoding
    _block_nbytes: int
    _block_nchars: int

    def __init__(self, alphabet: Union[str, range, Alphabet], *,
                 case_sensitive: Optional[bool] = None,
                 block_nbytes: int = 1,
                 block_nchars: int = 1):
        validate(block_nbytes, int)
        validate(block_nchars, int)
        super().__init__(alphabet, case_sensitive=case_sensitive)
        self._simple_encoding = SimpleBaseEncoding(self.alphabet)
        self._block_nbytes = block_nbytes
        self._block_nchars = block_nchars
        self.__validate_init()

    def __validate_init(self) -> None:
        base = self.base
        block_nbytes = self.block_nbytes
        block_nchars = self.block_nchars
        max_block_nbytes = ZeropadBaseEncoding.max_block_nbytes(base, block_nchars)
        max_block_nchars = ZeropadBaseEncoding.max_block_nchars(base, block_nbytes)
        if block_nchars > max_block_nchars:
            raise ValueError(f"Number of characters allowed per zero-padding block is too large: "
                             f"the maximum for base = {base} and block_nbytes = {block_nbytes} is "
                             f"block_nchars = {max_block_nchars}")
        if block_nbytes > max_block_nbytes:
            raise ValueError(f"Number of bytes allowed per zero-padding block is too large: "
                             f"the maximum for base = {base} and block_nchars {block_nchars} is "
                             f"block_nbytes = {max_block_nbytes}")

    @staticmethod
    def max_block_nchars(base: int, block_nbytes: int) -> int:
        """
            Returns the maximum integer value for `block_chars` such that:

            ```py
                256**block_nbytes > base**(block_nchars-1)
            ```
        """
        validate(base, int)
        validate(block_nbytes, int)
        if base <= 1:
            raise ValueError("Base must be >= 2.")
        if block_nbytes <= 0:
            raise ValueError("Number of bytes per zero-padding block must be positive.")
        _max_nc = block_nbytes/math.log(base, 256)+1
        _max_nc_floor = int(math.floor(_max_nc))
        return _max_nc_floor if _max_nc > _max_nc_floor else _max_nc_floor-1


    @staticmethod
    def max_block_nbytes(base: int, block_nchars: int) -> int:
        """
            Returns the maximum integer value for `block_nbytes` such that:

            ```py
                base**block_nchars > 256**(block_nbytes-1)
            ```
        """
        validate(base, int)
        validate(block_nchars, int)
        if base <= 1:
            raise ValueError("Base must be >= 2.")
        if block_nchars <= 0:
            raise ValueError("Number of chars per zero-padding block must be positive.")
        _max_nb = block_nchars/math.log(256, base)+1
        _max_nb_floor = int(math.floor(_max_nb))
        return _max_nb_floor if _max_nb > _max_nb_floor else _max_nb_floor-1


    @property
    def block_nbytes(self) -> int:
        """
            Number of bytes in a block.
        """
        return self._block_nbytes

    @property
    def block_nchars(self) -> int:
        """
            Number of characters in a block.
        """
        return self._block_nchars

    def _canonical_bytes(self, b: bytes) -> bytes:
        self._validate_bytes(b)
        block_nbytes = self.block_nbytes
        extra_bytes = len(b)%block_nbytes
        if extra_bytes == 0:
            return b
        return b"\x00"*(block_nbytes-extra_bytes)+b

    def _canonical_string(self, s: str) -> str:
        self._validate_string(s)
        block_nchars = self.block_nchars
        extra_chars = len(s)%block_nchars
        if extra_chars == 0:
            return s
        return self.zero_char*(block_nchars-extra_chars)+s

    def _encode(self, b: bytes) -> str:
        b = self._canonical_bytes(b)
        block_nbytes = self.block_nbytes
        block_nchars = self.block_nchars
        zero_char = self.zero_char
        # strip leading zero bytes
        b_stripped = b.lstrip(b"\x00")
        # compute simple base encoding
        s = self._simple_encoding.encode(b_stripped)
        # pad simple base encoding to integral multiple of block char size
        extra_chars = len(s)%block_nchars
        if extra_chars != 0:
            s = zero_char*(block_nchars-extra_chars)+s
        # count leading zero blocks
        num_zero_blocks = (len(b)-len(b_stripped))//block_nbytes
        # return zero-padded base encoding
        s = zero_char*num_zero_blocks*block_nchars+s
        return s

    def _decode(self, s: str) -> bytes:
        s = self._canonical_string(s)
        block_nbytes = self.block_nbytes
        block_nchars = self.block_nchars
        # strip leading zero chars
        s_stripped = s.lstrip(self.zero_char)
        # compute simple base decoding
        b = self._simple_encoding.decode(s_stripped)
        # pad simple base decoding to integral multiple of block byte size
        extra_bytes = len(b)%block_nbytes
        if extra_bytes != 0:
            b = b"\x00"*(block_nbytes-extra_bytes)+b
        # compute leading zero blocks
        num_zero_blocks = (len(s)-len(s_stripped))//block_nchars
        # return zero-padded base decoding
        b = b"\x00"*num_zero_blocks*block_nbytes+b
        return b

    def options(self, skip_defaults: bool = False) -> Mapping[str, Any]:
        validate(skip_defaults, bool)
        options: Dict[str, Any] = {}
        if not skip_defaults or self.block_nbytes != 1:
            options["block_nbytes"] = self.block_nbytes
        if not skip_defaults or self.block_nchars != 1:
            options["block_nchars"] = self.block_nchars
        return options

Classes

class ZeropadBaseEncoding (alphabet: Union[str, range, Alphabet], *, case_sensitive: Optional[bool] = None, block_nbytes: int = 1, block_nchars: int = 1)

Zero-added base encodings. Similar to SimpleBaseEncoding, but additionally:

  • preserves leading zeros
  • optionally enforces a fixed block size for encoded strings and decoded bytestrings

Constructor options:

  • block_nbytes: int = 1 number of bytes in a block (for decoded bytestrings)
  • block_nchars: int = 1 number of chars in a block (for encoded strings)

The static method ZeropadBaseEncoding.max_block_nchars() (resp. ZeropadBaseEncoding.max_block_nbytes()) gives the maximum block size in chars (resp. bytes) that can be used for a given block size in bytes (resp. chars). This is to ensure that encoding/decoding can always be performed unambiguously.

Encoding of a bytestring b:

  1. count the number Z of leading zero byte blocks in b and strip them (default: count zero bytes and strip them)
  2. encode b as SimpleBaseEncoding would
  3. prepend the minimum number of zero chars necessary to make the encoded string length an integral multiple of block_nchars
  4. prepend Z zero char blocks to the encoded string

Decoding of a string s:

  1. count the number Z of leading zero char blocks in s and strip them (default: count zero chars and strip them)
  2. decode s as SimpleBaseEncoding would
  3. prepend the minimum number of zero bytes necessary to make the decoded bytestring length an integral multiple of block_nbytes
  4. prepend Z zero byte blocks to the encoded string
Expand source code
class ZeropadBaseEncoding(BaseEncoding):
    """
        Zero-added base encodings. Similar to `bases.encoding.simple.SimpleBaseEncoding`, but additionally:

        - preserves leading zeros
        - optionally enforces a fixed block size for encoded strings and decoded bytestrings

        Constructor options:

        - `block_nbytes: int = 1` number of bytes in a block (for decoded bytestrings)
        - `block_nchars: int = 1` number of chars in a block (for encoded strings)

        The static method `ZeropadBaseEncoding.max_block_nchars` (resp. `ZeropadBaseEncoding.max_block_nbytes`) gives the
        maximum block size in chars (resp. bytes) that can be used for a given block size in bytes (resp. chars).
        This is to ensure that encoding/decoding can always be performed unambiguously.

        Encoding of a bytestring `b`:

        1. count the number Z of leading zero byte blocks in `b` and strip them (default: count zero bytes and strip them)
        2. encode `b` as `bases.encoding.simple.SimpleBaseEncoding` would
        3. prepend the minimum number of zero chars necessary to make the encoded string length an integral multiple of `block_nchars`
        4. prepend Z zero char blocks to the encoded string

        Decoding of a string `s`:

        1. count the number Z of leading zero char blocks in `s` and strip them (default: count zero chars and strip them)
        2. decode `s` as `bases.encoding.simple.SimpleBaseEncoding` would
        3. prepend the minimum number of zero bytes necessary to make the decoded bytestring length an integral multiple of `block_nbytes`
        4. prepend Z zero byte blocks to the encoded string

    """

    _simple_encoding: SimpleBaseEncoding
    _block_nbytes: int
    _block_nchars: int

    def __init__(self, alphabet: Union[str, range, Alphabet], *,
                 case_sensitive: Optional[bool] = None,
                 block_nbytes: int = 1,
                 block_nchars: int = 1):
        validate(block_nbytes, int)
        validate(block_nchars, int)
        super().__init__(alphabet, case_sensitive=case_sensitive)
        self._simple_encoding = SimpleBaseEncoding(self.alphabet)
        self._block_nbytes = block_nbytes
        self._block_nchars = block_nchars
        self.__validate_init()

    def __validate_init(self) -> None:
        base = self.base
        block_nbytes = self.block_nbytes
        block_nchars = self.block_nchars
        max_block_nbytes = ZeropadBaseEncoding.max_block_nbytes(base, block_nchars)
        max_block_nchars = ZeropadBaseEncoding.max_block_nchars(base, block_nbytes)
        if block_nchars > max_block_nchars:
            raise ValueError(f"Number of characters allowed per zero-padding block is too large: "
                             f"the maximum for base = {base} and block_nbytes = {block_nbytes} is "
                             f"block_nchars = {max_block_nchars}")
        if block_nbytes > max_block_nbytes:
            raise ValueError(f"Number of bytes allowed per zero-padding block is too large: "
                             f"the maximum for base = {base} and block_nchars {block_nchars} is "
                             f"block_nbytes = {max_block_nbytes}")

    @staticmethod
    def max_block_nchars(base: int, block_nbytes: int) -> int:
        """
            Returns the maximum integer value for `block_chars` such that:

            ```py
                256**block_nbytes > base**(block_nchars-1)
            ```
        """
        validate(base, int)
        validate(block_nbytes, int)
        if base <= 1:
            raise ValueError("Base must be >= 2.")
        if block_nbytes <= 0:
            raise ValueError("Number of bytes per zero-padding block must be positive.")
        _max_nc = block_nbytes/math.log(base, 256)+1
        _max_nc_floor = int(math.floor(_max_nc))
        return _max_nc_floor if _max_nc > _max_nc_floor else _max_nc_floor-1


    @staticmethod
    def max_block_nbytes(base: int, block_nchars: int) -> int:
        """
            Returns the maximum integer value for `block_nbytes` such that:

            ```py
                base**block_nchars > 256**(block_nbytes-1)
            ```
        """
        validate(base, int)
        validate(block_nchars, int)
        if base <= 1:
            raise ValueError("Base must be >= 2.")
        if block_nchars <= 0:
            raise ValueError("Number of chars per zero-padding block must be positive.")
        _max_nb = block_nchars/math.log(256, base)+1
        _max_nb_floor = int(math.floor(_max_nb))
        return _max_nb_floor if _max_nb > _max_nb_floor else _max_nb_floor-1


    @property
    def block_nbytes(self) -> int:
        """
            Number of bytes in a block.
        """
        return self._block_nbytes

    @property
    def block_nchars(self) -> int:
        """
            Number of characters in a block.
        """
        return self._block_nchars

    def _canonical_bytes(self, b: bytes) -> bytes:
        self._validate_bytes(b)
        block_nbytes = self.block_nbytes
        extra_bytes = len(b)%block_nbytes
        if extra_bytes == 0:
            return b
        return b"\x00"*(block_nbytes-extra_bytes)+b

    def _canonical_string(self, s: str) -> str:
        self._validate_string(s)
        block_nchars = self.block_nchars
        extra_chars = len(s)%block_nchars
        if extra_chars == 0:
            return s
        return self.zero_char*(block_nchars-extra_chars)+s

    def _encode(self, b: bytes) -> str:
        b = self._canonical_bytes(b)
        block_nbytes = self.block_nbytes
        block_nchars = self.block_nchars
        zero_char = self.zero_char
        # strip leading zero bytes
        b_stripped = b.lstrip(b"\x00")
        # compute simple base encoding
        s = self._simple_encoding.encode(b_stripped)
        # pad simple base encoding to integral multiple of block char size
        extra_chars = len(s)%block_nchars
        if extra_chars != 0:
            s = zero_char*(block_nchars-extra_chars)+s
        # count leading zero blocks
        num_zero_blocks = (len(b)-len(b_stripped))//block_nbytes
        # return zero-padded base encoding
        s = zero_char*num_zero_blocks*block_nchars+s
        return s

    def _decode(self, s: str) -> bytes:
        s = self._canonical_string(s)
        block_nbytes = self.block_nbytes
        block_nchars = self.block_nchars
        # strip leading zero chars
        s_stripped = s.lstrip(self.zero_char)
        # compute simple base decoding
        b = self._simple_encoding.decode(s_stripped)
        # pad simple base decoding to integral multiple of block byte size
        extra_bytes = len(b)%block_nbytes
        if extra_bytes != 0:
            b = b"\x00"*(block_nbytes-extra_bytes)+b
        # compute leading zero blocks
        num_zero_blocks = (len(s)-len(s_stripped))//block_nchars
        # return zero-padded base decoding
        b = b"\x00"*num_zero_blocks*block_nbytes+b
        return b

    def options(self, skip_defaults: bool = False) -> Mapping[str, Any]:
        validate(skip_defaults, bool)
        options: Dict[str, Any] = {}
        if not skip_defaults or self.block_nbytes != 1:
            options["block_nbytes"] = self.block_nbytes
        if not skip_defaults or self.block_nchars != 1:
            options["block_nchars"] = self.block_nchars
        return options

Ancestors

Static methods

def max_block_nbytes(base: int, block_nchars: int) ‑> int

Returns the maximum integer value for block_nbytes such that:

    base**block_nchars > 256**(block_nbytes-1)
Expand source code
@staticmethod
def max_block_nbytes(base: int, block_nchars: int) -> int:
    """
        Returns the maximum integer value for `block_nbytes` such that:

        ```py
            base**block_nchars > 256**(block_nbytes-1)
        ```
    """
    validate(base, int)
    validate(block_nchars, int)
    if base <= 1:
        raise ValueError("Base must be >= 2.")
    if block_nchars <= 0:
        raise ValueError("Number of chars per zero-padding block must be positive.")
    _max_nb = block_nchars/math.log(256, base)+1
    _max_nb_floor = int(math.floor(_max_nb))
    return _max_nb_floor if _max_nb > _max_nb_floor else _max_nb_floor-1
def max_block_nchars(base: int, block_nbytes: int) ‑> int

Returns the maximum integer value for block_chars such that:

    256**block_nbytes > base**(block_nchars-1)
Expand source code
@staticmethod
def max_block_nchars(base: int, block_nbytes: int) -> int:
    """
        Returns the maximum integer value for `block_chars` such that:

        ```py
            256**block_nbytes > base**(block_nchars-1)
        ```
    """
    validate(base, int)
    validate(block_nbytes, int)
    if base <= 1:
        raise ValueError("Base must be >= 2.")
    if block_nbytes <= 0:
        raise ValueError("Number of bytes per zero-padding block must be positive.")
    _max_nc = block_nbytes/math.log(base, 256)+1
    _max_nc_floor = int(math.floor(_max_nc))
    return _max_nc_floor if _max_nc > _max_nc_floor else _max_nc_floor-1

Instance variables

var block_nbytes : int

Number of bytes in a block.

Expand source code
@property
def block_nbytes(self) -> int:
    """
        Number of bytes in a block.
    """
    return self._block_nbytes
var block_nchars : int

Number of characters in a block.

Expand source code
@property
def block_nchars(self) -> int:
    """
        Number of characters in a block.
    """
    return self._block_nchars

Inherited members