Module redvox.common.sensor_data

Defines generic sensor data and data for API-independent analysis all timestamps are integers in microseconds unless otherwise stated

Expand source code
"""
Defines generic sensor data and data for API-independent analysis
all timestamps are integers in microseconds unless otherwise stated
"""
import enum
from typing import List, Union

import numpy as np
import pandas as pd

import redvox.common.date_time_utils as dtu
from redvox.common import offset_model as om
from redvox.common.errors import RedVoxExceptions
from redvox.common.gap_and_pad_utils import calc_evenly_sampled_timestamps
from redvox.api1000.wrapped_redvox_packet.station_information import (
    NetworkType,
    PowerState,
    CellServiceState,
)
from redvox.api1000.wrapped_redvox_packet.sensors.location import LocationProvider
from redvox.api1000.wrapped_redvox_packet.sensors.image import ImageCodec
from redvox.api1000.wrapped_redvox_packet.sensors.audio import AudioCodec

# columns that cannot be interpolated
NON_INTERPOLATED_COLUMNS = ["compressed_audio", "image"]
# columns that are not numeric but can be interpolated
NON_NUMERIC_COLUMNS = ["location_provider", "image_codec", "audio_codec",
                       "network_type", "power_state", "cell_service"]


class SensorType(enum.Enum):
    """
    Enumeration of possible types of sensors to read data from
    """

    UNKNOWN_SENSOR = 0  # unknown sensor
    ACCELEROMETER = 1  # meters/second^2
    AMBIENT_TEMPERATURE = 2  # degrees Celsius
    AUDIO = 3  # normalized counts
    COMPRESSED_AUDIO = 4  # bytes (codec specific)
    GRAVITY = 5  # meters/second^2
    GYROSCOPE = 6  # radians/second
    IMAGE = 7  # bytes (codec specific)
    LIGHT = 8  # lux
    LINEAR_ACCELERATION = 9  # meters/second^2
    LOCATION = 10  # See standard
    MAGNETOMETER = 11  # microtesla
    ORIENTATION = 12  # radians
    PRESSURE = 13  # kilopascal
    PROXIMITY = 14  # on, off, cm
    RELATIVE_HUMIDITY = 15  # percentage
    ROTATION_VECTOR = 16  # Unitless
    INFRARED = 17  # this is proximity
    STATION_HEALTH = 18
    # battery charge and current level, phone internal temperature, network source and strength,
    # available RAM of the system, cell service status, amount of hard disk space left, power charging state
    BEST_LOCATION = 19  # See standard

    @staticmethod
    def type_from_str(type_str: str) -> "SensorType":
        """
        converts a string to a sensor type
        :param type_str: string to convert
        :return: a sensor type, UNKNOWN_SENSOR is the default for invalid inputs
        """
        if (
                type_str.lower() == "audio"
                or type_str.lower() == "mic"
                or type_str.lower() == "microphone"
        ):
            return SensorType.AUDIO
        elif type_str.lower() == "accelerometer" or type_str.lower() == "accel":
            return SensorType.ACCELEROMETER
        elif type_str.lower() == "ambient_temperature":
            return SensorType.AMBIENT_TEMPERATURE
        elif type_str.lower() == "compressed_audio":
            return SensorType.COMPRESSED_AUDIO
        elif type_str.lower() == "gravity":
            return SensorType.GRAVITY
        elif type_str.lower() == "gyroscope" or type_str.lower() == "gyro":
            return SensorType.GYROSCOPE
        elif type_str.lower() == "image":
            return SensorType.IMAGE
        elif type_str.lower() == "light":
            return SensorType.LIGHT
        elif (
                type_str.lower() == "linear_acceleration"
                or type_str.lower() == "linear_accel"
        ):
            return SensorType.LINEAR_ACCELERATION
        elif type_str.lower() == "location" or type_str.lower() == "loc":
            return SensorType.LOCATION
        elif type_str.lower() == "best_location" or type_str.lower() == "best_loc":
            return SensorType.BEST_LOCATION
        elif type_str.lower() == "magnetometer" or type_str.lower() == "mag":
            return SensorType.MAGNETOMETER
        elif type_str.lower() == "orientation":
            return SensorType.ORIENTATION
        elif (
                type_str.lower() == "pressure"
                or type_str.lower() == "bar"
                or type_str.lower() == "barometer"
        ):
            return SensorType.PRESSURE
        elif type_str.lower() == "proximity" or type_str.lower() == "infrared":
            return SensorType.PROXIMITY
        elif type_str.lower() == "relative_humidity":
            return SensorType.RELATIVE_HUMIDITY
        elif type_str.lower() == "rotation_vector":
            return SensorType.ROTATION_VECTOR
        else:
            return SensorType.UNKNOWN_SENSOR


class SensorData:
    """
    Generic SensorData class for API-independent analysis
    Properties:
        name: string, name of sensor
        type: SensorType, enumerated type of sensor
        data_df: dataframe of the sensor data; always has timestamps as the first column,
                    the other columns are the data fields
        sample_rate_hz: float, sample rate in Hz of the sensor, default np.nan, usually 1/sample_interval_s
        sample_interval_s: float, mean duration in seconds between samples, default np.nan, usually 1/sample_rate
        sample_interval_std_s: float, standard deviation in seconds between samples, default np.nan
        is_sample_rate_fixed: bool, True if sample rate is constant, default False
        timestamps_altered: bool, True if timestamps in the sensor have been altered from their original values
                            default False
    """

    def __init__(
            self,
            sensor_name: str,
            sensor_data: pd.DataFrame,
            sensor_type: SensorType = SensorType.UNKNOWN_SENSOR,
            sample_rate_hz: float = np.nan,
            sample_interval_s: float = np.nan,
            sample_interval_std_s: float = np.nan,
            is_sample_rate_fixed: bool = False,
            are_timestamps_altered: bool = False,
            calculate_stats: bool = False
    ):
        """
        initialize the sensor data with params
        :param sensor_name: name of the sensor
        :param sensor_type: enumerated type of the sensor, default SensorType.UNKNOWN_SENSOR
        :param sensor_data: dataframe with the timestamps and sensor data; first column is always the timestamps,
                            the other columns are the data channels in the sensor
        :param sample_rate_hz: sample rate in hz of the data
        :param sample_interval_s: sample interval in seconds of the data
        :param sample_interval_std_s: std dev of sample interval in seconds of the data
        :param is_sample_rate_fixed: if True, sample rate is constant for all data, default False
        :param are_timestamps_altered: if True, timestamps in the sensor have been altered from their
                                        original values, default False
        :param calculate_stats: if True, calculate sample_rate, sample_interval_s, and sample_interval_std_s
                                default False
        """
        if "timestamps" not in sensor_data.columns:
            raise AttributeError(
                'SensorData requires the data frame to contain a column titled "timestamps"'
            )
        self.name: str = sensor_name
        self.type: SensorType = sensor_type
        self.data_df: pd.DataFrame = sensor_data.infer_objects()
        self.sample_rate_hz: float = sample_rate_hz
        self.sample_interval_s: float = sample_interval_s
        self.sample_interval_std_s: float = sample_interval_std_s
        self.is_sample_rate_fixed: bool = is_sample_rate_fixed
        self.timestamps_altered: bool = are_timestamps_altered
        self.errors: RedVoxExceptions = RedVoxExceptions("Sensor")
        if calculate_stats:
            self.organize_and_update_stats()
        else:
            self.sort_by_data_timestamps()

    def print_errors(self):
        """
        prints errors to screen
        """
        self.errors.print()

    def is_sample_interval_invalid(self) -> bool:
        """
        :return: True if sample interval is np.nan or equal to 0.0
        """
        return np.isnan(self.sample_interval_s) or self.sample_interval_s == 0.0

    def organize_and_update_stats(self) -> "SensorData":
        """
        sorts the data by timestamps, then if the sample rate is not fixed, recalculates the sample rate, interval,
            and interval std dev.  If there is only one value, sets the sample rate, interval, and interval std dev
            to np.nan.  Updates the SensorData object with the new values
        :return: updated version of self
        """
        self.sort_by_data_timestamps()
        if not self.is_sample_rate_fixed:
            if self.num_samples() > 1:
                timestamp_diffs = np.diff(self.data_timestamps())
                self.sample_interval_s = dtu.microseconds_to_seconds(
                    float(np.mean(timestamp_diffs))
                )
                self.sample_interval_std_s = dtu.microseconds_to_seconds(
                    float(np.std(timestamp_diffs))
                )
                self.sample_rate_hz = (
                    np.nan
                    if self.is_sample_interval_invalid()
                    else 1 / self.sample_interval_s
                )
            else:
                self.sample_interval_s = np.nan
                self.sample_interval_std_s = np.nan
                self.sample_rate_hz = np.nan
        return self

    def append_data(
            self, new_data: pd.DataFrame, recalculate_stats: bool = False
    ) -> "SensorData":
        """
        append the new data to the dataframe, update the sensor's stats on demand if it doesn't have a fixed
            sample rate, then return the updated SensorData object
        :param new_data: Dataframe containing data to add to the sensor's dataframe
        :param recalculate_stats: if True and the sensor does not have a fixed sample rate, sort the timestamps,
                                    recalculate the sample rate, interval, and interval std dev, default False
        :return: the updated SensorData object
        """
        self.data_df = self.data_df.append(new_data, ignore_index=True)
        if recalculate_stats and not self.is_sample_rate_fixed:
            self.organize_and_update_stats()
        return self

    def sensor_type_as_str(self) -> str:
        """
        gets the sensor type as a string
        :return: sensor type of the sensor as a string
        """
        return self.type.name

    def samples(self) -> np.ndarray:
        """
        gets the samples of dataframe
        :return: the data values of the dataframe as a numpy ndarray
        """
        return self.data_df.iloc[:, 2:].T.to_numpy()

    def get_data_channel(self, channel_name: str) -> Union[np.array, List[str]]:
        """
        gets the data channel specified, raises an error and lists valid fields if channel_name is not in the dataframe
        :param channel_name: the name of the channel to get data for
        :return: the data values of the channel as a numpy array or list of strings for enumerated channels
        """
        if channel_name not in self.data_df.columns:
            raise ValueError(
                f"WARNING: {channel_name} does not exist; try one of {self.data_channels()}"
            )
        if channel_name == "location_provider":
            return [LocationProvider(c).name for c in self.data_df[channel_name]]
        elif channel_name == "image_codec":
            return [ImageCodec(c).name for c in self.data_df[channel_name]]
        elif channel_name == "audio_codec":
            return [AudioCodec(c).name for c in self.data_df[channel_name]]
        elif channel_name == "network_type":
            return [NetworkType(c).name for c in self.data_df[channel_name]]
        elif channel_name == "power_state":
            return [PowerState(c).name for c in self.data_df[channel_name]]
        elif channel_name == "cell_service":
            return [CellServiceState(c).name for c in self.data_df[channel_name]]
        return self.data_df[channel_name].to_numpy()

    def get_valid_data_channel_values(self, channel_name: str) -> np.array:
        """
        gets all non-nan values from the channel specified
        :param channel_name: the name of the channel to get data for
        :return: non-nan values of the channel as a numpy array
        """
        channel_data = self.get_data_channel(channel_name)
        return channel_data[~np.isnan(channel_data)]

    def data_timestamps(self) -> np.array:
        """
        :return: the timestamps as a numpy array
        """
        return self.data_df["timestamps"].to_numpy(dtype=float)

    def unaltered_data_timestamps(self) -> np.array:
        """
        :return: the unaltered timestamps as a numpy array
        """
        return self.data_df["unaltered_timestamps"].to_numpy(dtype=float)

    def first_data_timestamp(self) -> float:
        """
        :return: timestamp of the first data point
        """
        return self.data_df["timestamps"].iloc[0]

    def last_data_timestamp(self) -> float:
        """
        :return: timestamp of the last data point
        """
        return self.data_df["timestamps"].iloc[-1]

    def num_samples(self) -> int:
        """
        :return: the number of rows (samples) in the dataframe
        """
        return self.data_df.shape[0]

    def data_channels(self) -> List[str]:
        """
        :return: a list of the names of the columns (data channels) of the dataframe
        """
        return self.data_df.columns.to_list()

    def update_data_timestamps(self, offset_model: om.OffsetModel, use_model_function: bool = True):
        """
        updates the timestamps of the data points
        :param offset_model: model used to update the timestamps
        :param use_model_function: if True, use the offset model's correction function to correct time,
                                    otherwise use best offset (model's intercept value).  default True
        """
        slope = dtu.seconds_to_microseconds(self.sample_interval_s) * (1 + offset_model.slope) \
            if use_model_function else dtu.seconds_to_microseconds(self.sample_interval_s)
        if self.type == SensorType.AUDIO:
            # use the model to update the first timestamp or add the best offset (model's intercept value)
            self.data_df["timestamps"] = \
                calc_evenly_sampled_timestamps(offset_model.update_time(self.first_data_timestamp(),
                                                                        use_model_function),
                                               self.num_samples(),
                                               slope)
        else:
            self.data_df["timestamps"] = offset_model.update_timestamps(self.data_timestamps(), use_model_function)
        time_diffs = np.floor(np.diff(self.data_timestamps()))
        if len(time_diffs) > 1:
            self.sample_interval_s = dtu.microseconds_to_seconds(slope)
            if self.sample_interval_s > 0:
                self.sample_rate_hz = 1 / self.sample_interval_s
                self.sample_interval_std_s = dtu.microseconds_to_seconds(np.std(time_diffs))
        self.timestamps_altered = True

    def sort_by_data_timestamps(self, ascending: bool = True):
        """
        sorts the data based on timestamps
        :param ascending: if True, timestamps are sorted in ascending order
        """
        self.data_df = self.data_df.sort_values("timestamps", ascending=ascending)

    def interpolate(self, interpolate_timestamp: float, first_point: int, second_point: int = 0,
                    copy: bool = True) -> pd.Series:
        """
        interpolates two points at the intercept value.  the two points must be consecutive in the dataframe
        :param interpolate_timestamp: timestamp to interpolate other values
        :param first_point: index of first point
        :param second_point: delta to second point, default 0 (same as first point)
        :param copy: if True, copies the values of the first point, default True
        :return: pd.Series of interpolated points
        """
        start_point = self.data_df.iloc[first_point]
        numeric_start = start_point[[col for col in self.data_df.columns
                                     if col not in NON_INTERPOLATED_COLUMNS + NON_NUMERIC_COLUMNS]]
        non_numeric_start = start_point[[col for col in self.data_df.columns if col in NON_NUMERIC_COLUMNS]]
        if not copy and second_point:
            end_point = self.data_df.iloc[first_point + second_point]
            numeric_end = end_point[[col for col in self.data_df.columns
                                     if col not in NON_INTERPOLATED_COLUMNS + NON_NUMERIC_COLUMNS]]
            non_numeric_end = end_point[[col for col in self.data_df.columns if col in NON_NUMERIC_COLUMNS]]
            first_closer = \
                np.abs(start_point["timestamps"] - interpolate_timestamp) \
                <= np.abs(end_point["timestamps"] - interpolate_timestamp)
            if first_closer:
                non_numeric_diff = non_numeric_start
            else:
                non_numeric_diff = non_numeric_end
            # if copy:
            #     if first_closer:
            #         numeric_diff = numeric_start
            #     else:
            #         numeric_diff = numeric_end
            # else:
            numeric_diff = numeric_end - numeric_start
            numeric_diff = \
                (numeric_diff / numeric_diff["timestamps"]) * \
                (interpolate_timestamp - numeric_start) + numeric_start
        else:
            numeric_diff = numeric_start
            non_numeric_diff = non_numeric_start
        numeric_diff["timestamps"] = interpolate_timestamp
        return pd.concat([numeric_diff, non_numeric_diff])

Classes

class SensorData (sensor_name: str, sensor_data: pandas.core.frame.DataFrame, sensor_type: SensorType = SensorType.UNKNOWN_SENSOR, sample_rate_hz: float = nan, sample_interval_s: float = nan, sample_interval_std_s: float = nan, is_sample_rate_fixed: bool = False, are_timestamps_altered: bool = False, calculate_stats: bool = False)

Generic SensorData class for API-independent analysis

Properties

name: string, name of sensor type: SensorType, enumerated type of sensor data_df: dataframe of the sensor data; always has timestamps as the first column, the other columns are the data fields sample_rate_hz: float, sample rate in Hz of the sensor, default np.nan, usually 1/sample_interval_s sample_interval_s: float, mean duration in seconds between samples, default np.nan, usually 1/sample_rate sample_interval_std_s: float, standard deviation in seconds between samples, default np.nan is_sample_rate_fixed: bool, True if sample rate is constant, default False timestamps_altered: bool, True if timestamps in the sensor have been altered from their original values default False

initialize the sensor data with params :param sensor_name: name of the sensor :param sensor_type: enumerated type of the sensor, default SensorType.UNKNOWN_SENSOR :param sensor_data: dataframe with the timestamps and sensor data; first column is always the timestamps, the other columns are the data channels in the sensor :param sample_rate_hz: sample rate in hz of the data :param sample_interval_s: sample interval in seconds of the data :param sample_interval_std_s: std dev of sample interval in seconds of the data :param is_sample_rate_fixed: if True, sample rate is constant for all data, default False :param are_timestamps_altered: if True, timestamps in the sensor have been altered from their original values, default False :param calculate_stats: if True, calculate sample_rate, sample_interval_s, and sample_interval_std_s default False

Expand source code
class SensorData:
    """
    Generic SensorData class for API-independent analysis
    Properties:
        name: string, name of sensor
        type: SensorType, enumerated type of sensor
        data_df: dataframe of the sensor data; always has timestamps as the first column,
                    the other columns are the data fields
        sample_rate_hz: float, sample rate in Hz of the sensor, default np.nan, usually 1/sample_interval_s
        sample_interval_s: float, mean duration in seconds between samples, default np.nan, usually 1/sample_rate
        sample_interval_std_s: float, standard deviation in seconds between samples, default np.nan
        is_sample_rate_fixed: bool, True if sample rate is constant, default False
        timestamps_altered: bool, True if timestamps in the sensor have been altered from their original values
                            default False
    """

    def __init__(
            self,
            sensor_name: str,
            sensor_data: pd.DataFrame,
            sensor_type: SensorType = SensorType.UNKNOWN_SENSOR,
            sample_rate_hz: float = np.nan,
            sample_interval_s: float = np.nan,
            sample_interval_std_s: float = np.nan,
            is_sample_rate_fixed: bool = False,
            are_timestamps_altered: bool = False,
            calculate_stats: bool = False
    ):
        """
        initialize the sensor data with params
        :param sensor_name: name of the sensor
        :param sensor_type: enumerated type of the sensor, default SensorType.UNKNOWN_SENSOR
        :param sensor_data: dataframe with the timestamps and sensor data; first column is always the timestamps,
                            the other columns are the data channels in the sensor
        :param sample_rate_hz: sample rate in hz of the data
        :param sample_interval_s: sample interval in seconds of the data
        :param sample_interval_std_s: std dev of sample interval in seconds of the data
        :param is_sample_rate_fixed: if True, sample rate is constant for all data, default False
        :param are_timestamps_altered: if True, timestamps in the sensor have been altered from their
                                        original values, default False
        :param calculate_stats: if True, calculate sample_rate, sample_interval_s, and sample_interval_std_s
                                default False
        """
        if "timestamps" not in sensor_data.columns:
            raise AttributeError(
                'SensorData requires the data frame to contain a column titled "timestamps"'
            )
        self.name: str = sensor_name
        self.type: SensorType = sensor_type
        self.data_df: pd.DataFrame = sensor_data.infer_objects()
        self.sample_rate_hz: float = sample_rate_hz
        self.sample_interval_s: float = sample_interval_s
        self.sample_interval_std_s: float = sample_interval_std_s
        self.is_sample_rate_fixed: bool = is_sample_rate_fixed
        self.timestamps_altered: bool = are_timestamps_altered
        self.errors: RedVoxExceptions = RedVoxExceptions("Sensor")
        if calculate_stats:
            self.organize_and_update_stats()
        else:
            self.sort_by_data_timestamps()

    def print_errors(self):
        """
        prints errors to screen
        """
        self.errors.print()

    def is_sample_interval_invalid(self) -> bool:
        """
        :return: True if sample interval is np.nan or equal to 0.0
        """
        return np.isnan(self.sample_interval_s) or self.sample_interval_s == 0.0

    def organize_and_update_stats(self) -> "SensorData":
        """
        sorts the data by timestamps, then if the sample rate is not fixed, recalculates the sample rate, interval,
            and interval std dev.  If there is only one value, sets the sample rate, interval, and interval std dev
            to np.nan.  Updates the SensorData object with the new values
        :return: updated version of self
        """
        self.sort_by_data_timestamps()
        if not self.is_sample_rate_fixed:
            if self.num_samples() > 1:
                timestamp_diffs = np.diff(self.data_timestamps())
                self.sample_interval_s = dtu.microseconds_to_seconds(
                    float(np.mean(timestamp_diffs))
                )
                self.sample_interval_std_s = dtu.microseconds_to_seconds(
                    float(np.std(timestamp_diffs))
                )
                self.sample_rate_hz = (
                    np.nan
                    if self.is_sample_interval_invalid()
                    else 1 / self.sample_interval_s
                )
            else:
                self.sample_interval_s = np.nan
                self.sample_interval_std_s = np.nan
                self.sample_rate_hz = np.nan
        return self

    def append_data(
            self, new_data: pd.DataFrame, recalculate_stats: bool = False
    ) -> "SensorData":
        """
        append the new data to the dataframe, update the sensor's stats on demand if it doesn't have a fixed
            sample rate, then return the updated SensorData object
        :param new_data: Dataframe containing data to add to the sensor's dataframe
        :param recalculate_stats: if True and the sensor does not have a fixed sample rate, sort the timestamps,
                                    recalculate the sample rate, interval, and interval std dev, default False
        :return: the updated SensorData object
        """
        self.data_df = self.data_df.append(new_data, ignore_index=True)
        if recalculate_stats and not self.is_sample_rate_fixed:
            self.organize_and_update_stats()
        return self

    def sensor_type_as_str(self) -> str:
        """
        gets the sensor type as a string
        :return: sensor type of the sensor as a string
        """
        return self.type.name

    def samples(self) -> np.ndarray:
        """
        gets the samples of dataframe
        :return: the data values of the dataframe as a numpy ndarray
        """
        return self.data_df.iloc[:, 2:].T.to_numpy()

    def get_data_channel(self, channel_name: str) -> Union[np.array, List[str]]:
        """
        gets the data channel specified, raises an error and lists valid fields if channel_name is not in the dataframe
        :param channel_name: the name of the channel to get data for
        :return: the data values of the channel as a numpy array or list of strings for enumerated channels
        """
        if channel_name not in self.data_df.columns:
            raise ValueError(
                f"WARNING: {channel_name} does not exist; try one of {self.data_channels()}"
            )
        if channel_name == "location_provider":
            return [LocationProvider(c).name for c in self.data_df[channel_name]]
        elif channel_name == "image_codec":
            return [ImageCodec(c).name for c in self.data_df[channel_name]]
        elif channel_name == "audio_codec":
            return [AudioCodec(c).name for c in self.data_df[channel_name]]
        elif channel_name == "network_type":
            return [NetworkType(c).name for c in self.data_df[channel_name]]
        elif channel_name == "power_state":
            return [PowerState(c).name for c in self.data_df[channel_name]]
        elif channel_name == "cell_service":
            return [CellServiceState(c).name for c in self.data_df[channel_name]]
        return self.data_df[channel_name].to_numpy()

    def get_valid_data_channel_values(self, channel_name: str) -> np.array:
        """
        gets all non-nan values from the channel specified
        :param channel_name: the name of the channel to get data for
        :return: non-nan values of the channel as a numpy array
        """
        channel_data = self.get_data_channel(channel_name)
        return channel_data[~np.isnan(channel_data)]

    def data_timestamps(self) -> np.array:
        """
        :return: the timestamps as a numpy array
        """
        return self.data_df["timestamps"].to_numpy(dtype=float)

    def unaltered_data_timestamps(self) -> np.array:
        """
        :return: the unaltered timestamps as a numpy array
        """
        return self.data_df["unaltered_timestamps"].to_numpy(dtype=float)

    def first_data_timestamp(self) -> float:
        """
        :return: timestamp of the first data point
        """
        return self.data_df["timestamps"].iloc[0]

    def last_data_timestamp(self) -> float:
        """
        :return: timestamp of the last data point
        """
        return self.data_df["timestamps"].iloc[-1]

    def num_samples(self) -> int:
        """
        :return: the number of rows (samples) in the dataframe
        """
        return self.data_df.shape[0]

    def data_channels(self) -> List[str]:
        """
        :return: a list of the names of the columns (data channels) of the dataframe
        """
        return self.data_df.columns.to_list()

    def update_data_timestamps(self, offset_model: om.OffsetModel, use_model_function: bool = True):
        """
        updates the timestamps of the data points
        :param offset_model: model used to update the timestamps
        :param use_model_function: if True, use the offset model's correction function to correct time,
                                    otherwise use best offset (model's intercept value).  default True
        """
        slope = dtu.seconds_to_microseconds(self.sample_interval_s) * (1 + offset_model.slope) \
            if use_model_function else dtu.seconds_to_microseconds(self.sample_interval_s)
        if self.type == SensorType.AUDIO:
            # use the model to update the first timestamp or add the best offset (model's intercept value)
            self.data_df["timestamps"] = \
                calc_evenly_sampled_timestamps(offset_model.update_time(self.first_data_timestamp(),
                                                                        use_model_function),
                                               self.num_samples(),
                                               slope)
        else:
            self.data_df["timestamps"] = offset_model.update_timestamps(self.data_timestamps(), use_model_function)
        time_diffs = np.floor(np.diff(self.data_timestamps()))
        if len(time_diffs) > 1:
            self.sample_interval_s = dtu.microseconds_to_seconds(slope)
            if self.sample_interval_s > 0:
                self.sample_rate_hz = 1 / self.sample_interval_s
                self.sample_interval_std_s = dtu.microseconds_to_seconds(np.std(time_diffs))
        self.timestamps_altered = True

    def sort_by_data_timestamps(self, ascending: bool = True):
        """
        sorts the data based on timestamps
        :param ascending: if True, timestamps are sorted in ascending order
        """
        self.data_df = self.data_df.sort_values("timestamps", ascending=ascending)

    def interpolate(self, interpolate_timestamp: float, first_point: int, second_point: int = 0,
                    copy: bool = True) -> pd.Series:
        """
        interpolates two points at the intercept value.  the two points must be consecutive in the dataframe
        :param interpolate_timestamp: timestamp to interpolate other values
        :param first_point: index of first point
        :param second_point: delta to second point, default 0 (same as first point)
        :param copy: if True, copies the values of the first point, default True
        :return: pd.Series of interpolated points
        """
        start_point = self.data_df.iloc[first_point]
        numeric_start = start_point[[col for col in self.data_df.columns
                                     if col not in NON_INTERPOLATED_COLUMNS + NON_NUMERIC_COLUMNS]]
        non_numeric_start = start_point[[col for col in self.data_df.columns if col in NON_NUMERIC_COLUMNS]]
        if not copy and second_point:
            end_point = self.data_df.iloc[first_point + second_point]
            numeric_end = end_point[[col for col in self.data_df.columns
                                     if col not in NON_INTERPOLATED_COLUMNS + NON_NUMERIC_COLUMNS]]
            non_numeric_end = end_point[[col for col in self.data_df.columns if col in NON_NUMERIC_COLUMNS]]
            first_closer = \
                np.abs(start_point["timestamps"] - interpolate_timestamp) \
                <= np.abs(end_point["timestamps"] - interpolate_timestamp)
            if first_closer:
                non_numeric_diff = non_numeric_start
            else:
                non_numeric_diff = non_numeric_end
            # if copy:
            #     if first_closer:
            #         numeric_diff = numeric_start
            #     else:
            #         numeric_diff = numeric_end
            # else:
            numeric_diff = numeric_end - numeric_start
            numeric_diff = \
                (numeric_diff / numeric_diff["timestamps"]) * \
                (interpolate_timestamp - numeric_start) + numeric_start
        else:
            numeric_diff = numeric_start
            non_numeric_diff = non_numeric_start
        numeric_diff["timestamps"] = interpolate_timestamp
        return pd.concat([numeric_diff, non_numeric_diff])

Methods

def append_data(self, new_data: pandas.core.frame.DataFrame, recalculate_stats: bool = False) ‑> SensorData

append the new data to the dataframe, update the sensor's stats on demand if it doesn't have a fixed sample rate, then return the updated SensorData object :param new_data: Dataframe containing data to add to the sensor's dataframe :param recalculate_stats: if True and the sensor does not have a fixed sample rate, sort the timestamps, recalculate the sample rate, interval, and interval std dev, default False :return: the updated SensorData object

Expand source code
def append_data(
        self, new_data: pd.DataFrame, recalculate_stats: bool = False
) -> "SensorData":
    """
    append the new data to the dataframe, update the sensor's stats on demand if it doesn't have a fixed
        sample rate, then return the updated SensorData object
    :param new_data: Dataframe containing data to add to the sensor's dataframe
    :param recalculate_stats: if True and the sensor does not have a fixed sample rate, sort the timestamps,
                                recalculate the sample rate, interval, and interval std dev, default False
    :return: the updated SensorData object
    """
    self.data_df = self.data_df.append(new_data, ignore_index=True)
    if recalculate_stats and not self.is_sample_rate_fixed:
        self.organize_and_update_stats()
    return self
def data_channels(self) ‑> List[str]

:return: a list of the names of the columns (data channels) of the dataframe

Expand source code
def data_channels(self) -> List[str]:
    """
    :return: a list of the names of the columns (data channels) of the dataframe
    """
    return self.data_df.columns.to_list()
def data_timestamps(self) ‑> 

:return: the timestamps as a numpy array

Expand source code
def data_timestamps(self) -> np.array:
    """
    :return: the timestamps as a numpy array
    """
    return self.data_df["timestamps"].to_numpy(dtype=float)
def first_data_timestamp(self) ‑> float

:return: timestamp of the first data point

Expand source code
def first_data_timestamp(self) -> float:
    """
    :return: timestamp of the first data point
    """
    return self.data_df["timestamps"].iloc[0]
def get_data_channel(self, channel_name: str) ‑> Union[, List[str]]

gets the data channel specified, raises an error and lists valid fields if channel_name is not in the dataframe :param channel_name: the name of the channel to get data for :return: the data values of the channel as a numpy array or list of strings for enumerated channels

Expand source code
def get_data_channel(self, channel_name: str) -> Union[np.array, List[str]]:
    """
    gets the data channel specified, raises an error and lists valid fields if channel_name is not in the dataframe
    :param channel_name: the name of the channel to get data for
    :return: the data values of the channel as a numpy array or list of strings for enumerated channels
    """
    if channel_name not in self.data_df.columns:
        raise ValueError(
            f"WARNING: {channel_name} does not exist; try one of {self.data_channels()}"
        )
    if channel_name == "location_provider":
        return [LocationProvider(c).name for c in self.data_df[channel_name]]
    elif channel_name == "image_codec":
        return [ImageCodec(c).name for c in self.data_df[channel_name]]
    elif channel_name == "audio_codec":
        return [AudioCodec(c).name for c in self.data_df[channel_name]]
    elif channel_name == "network_type":
        return [NetworkType(c).name for c in self.data_df[channel_name]]
    elif channel_name == "power_state":
        return [PowerState(c).name for c in self.data_df[channel_name]]
    elif channel_name == "cell_service":
        return [CellServiceState(c).name for c in self.data_df[channel_name]]
    return self.data_df[channel_name].to_numpy()
def get_valid_data_channel_values(self, channel_name: str) ‑> 

gets all non-nan values from the channel specified :param channel_name: the name of the channel to get data for :return: non-nan values of the channel as a numpy array

Expand source code
def get_valid_data_channel_values(self, channel_name: str) -> np.array:
    """
    gets all non-nan values from the channel specified
    :param channel_name: the name of the channel to get data for
    :return: non-nan values of the channel as a numpy array
    """
    channel_data = self.get_data_channel(channel_name)
    return channel_data[~np.isnan(channel_data)]
def interpolate(self, interpolate_timestamp: float, first_point: int, second_point: int = 0, copy: bool = True) ‑> pandas.core.series.Series

interpolates two points at the intercept value. the two points must be consecutive in the dataframe :param interpolate_timestamp: timestamp to interpolate other values :param first_point: index of first point :param second_point: delta to second point, default 0 (same as first point) :param copy: if True, copies the values of the first point, default True :return: pd.Series of interpolated points

Expand source code
def interpolate(self, interpolate_timestamp: float, first_point: int, second_point: int = 0,
                copy: bool = True) -> pd.Series:
    """
    interpolates two points at the intercept value.  the two points must be consecutive in the dataframe
    :param interpolate_timestamp: timestamp to interpolate other values
    :param first_point: index of first point
    :param second_point: delta to second point, default 0 (same as first point)
    :param copy: if True, copies the values of the first point, default True
    :return: pd.Series of interpolated points
    """
    start_point = self.data_df.iloc[first_point]
    numeric_start = start_point[[col for col in self.data_df.columns
                                 if col not in NON_INTERPOLATED_COLUMNS + NON_NUMERIC_COLUMNS]]
    non_numeric_start = start_point[[col for col in self.data_df.columns if col in NON_NUMERIC_COLUMNS]]
    if not copy and second_point:
        end_point = self.data_df.iloc[first_point + second_point]
        numeric_end = end_point[[col for col in self.data_df.columns
                                 if col not in NON_INTERPOLATED_COLUMNS + NON_NUMERIC_COLUMNS]]
        non_numeric_end = end_point[[col for col in self.data_df.columns if col in NON_NUMERIC_COLUMNS]]
        first_closer = \
            np.abs(start_point["timestamps"] - interpolate_timestamp) \
            <= np.abs(end_point["timestamps"] - interpolate_timestamp)
        if first_closer:
            non_numeric_diff = non_numeric_start
        else:
            non_numeric_diff = non_numeric_end
        # if copy:
        #     if first_closer:
        #         numeric_diff = numeric_start
        #     else:
        #         numeric_diff = numeric_end
        # else:
        numeric_diff = numeric_end - numeric_start
        numeric_diff = \
            (numeric_diff / numeric_diff["timestamps"]) * \
            (interpolate_timestamp - numeric_start) + numeric_start
    else:
        numeric_diff = numeric_start
        non_numeric_diff = non_numeric_start
    numeric_diff["timestamps"] = interpolate_timestamp
    return pd.concat([numeric_diff, non_numeric_diff])
def is_sample_interval_invalid(self) ‑> bool

:return: True if sample interval is np.nan or equal to 0.0

Expand source code
def is_sample_interval_invalid(self) -> bool:
    """
    :return: True if sample interval is np.nan or equal to 0.0
    """
    return np.isnan(self.sample_interval_s) or self.sample_interval_s == 0.0
def last_data_timestamp(self) ‑> float

:return: timestamp of the last data point

Expand source code
def last_data_timestamp(self) -> float:
    """
    :return: timestamp of the last data point
    """
    return self.data_df["timestamps"].iloc[-1]
def num_samples(self) ‑> int

:return: the number of rows (samples) in the dataframe

Expand source code
def num_samples(self) -> int:
    """
    :return: the number of rows (samples) in the dataframe
    """
    return self.data_df.shape[0]
def organize_and_update_stats(self) ‑> SensorData

sorts the data by timestamps, then if the sample rate is not fixed, recalculates the sample rate, interval, and interval std dev. If there is only one value, sets the sample rate, interval, and interval std dev to np.nan. Updates the SensorData object with the new values :return: updated version of self

Expand source code
def organize_and_update_stats(self) -> "SensorData":
    """
    sorts the data by timestamps, then if the sample rate is not fixed, recalculates the sample rate, interval,
        and interval std dev.  If there is only one value, sets the sample rate, interval, and interval std dev
        to np.nan.  Updates the SensorData object with the new values
    :return: updated version of self
    """
    self.sort_by_data_timestamps()
    if not self.is_sample_rate_fixed:
        if self.num_samples() > 1:
            timestamp_diffs = np.diff(self.data_timestamps())
            self.sample_interval_s = dtu.microseconds_to_seconds(
                float(np.mean(timestamp_diffs))
            )
            self.sample_interval_std_s = dtu.microseconds_to_seconds(
                float(np.std(timestamp_diffs))
            )
            self.sample_rate_hz = (
                np.nan
                if self.is_sample_interval_invalid()
                else 1 / self.sample_interval_s
            )
        else:
            self.sample_interval_s = np.nan
            self.sample_interval_std_s = np.nan
            self.sample_rate_hz = np.nan
    return self
def print_errors(self)

prints errors to screen

Expand source code
def print_errors(self):
    """
    prints errors to screen
    """
    self.errors.print()
def samples(self) ‑> numpy.ndarray

gets the samples of dataframe :return: the data values of the dataframe as a numpy ndarray

Expand source code
def samples(self) -> np.ndarray:
    """
    gets the samples of dataframe
    :return: the data values of the dataframe as a numpy ndarray
    """
    return self.data_df.iloc[:, 2:].T.to_numpy()
def sensor_type_as_str(self) ‑> str

gets the sensor type as a string :return: sensor type of the sensor as a string

Expand source code
def sensor_type_as_str(self) -> str:
    """
    gets the sensor type as a string
    :return: sensor type of the sensor as a string
    """
    return self.type.name
def sort_by_data_timestamps(self, ascending: bool = True)

sorts the data based on timestamps :param ascending: if True, timestamps are sorted in ascending order

Expand source code
def sort_by_data_timestamps(self, ascending: bool = True):
    """
    sorts the data based on timestamps
    :param ascending: if True, timestamps are sorted in ascending order
    """
    self.data_df = self.data_df.sort_values("timestamps", ascending=ascending)
def unaltered_data_timestamps(self) ‑> 

:return: the unaltered timestamps as a numpy array

Expand source code
def unaltered_data_timestamps(self) -> np.array:
    """
    :return: the unaltered timestamps as a numpy array
    """
    return self.data_df["unaltered_timestamps"].to_numpy(dtype=float)
def update_data_timestamps(self, offset_model: OffsetModel, use_model_function: bool = True)

updates the timestamps of the data points :param offset_model: model used to update the timestamps :param use_model_function: if True, use the offset model's correction function to correct time, otherwise use best offset (model's intercept value). default True

Expand source code
def update_data_timestamps(self, offset_model: om.OffsetModel, use_model_function: bool = True):
    """
    updates the timestamps of the data points
    :param offset_model: model used to update the timestamps
    :param use_model_function: if True, use the offset model's correction function to correct time,
                                otherwise use best offset (model's intercept value).  default True
    """
    slope = dtu.seconds_to_microseconds(self.sample_interval_s) * (1 + offset_model.slope) \
        if use_model_function else dtu.seconds_to_microseconds(self.sample_interval_s)
    if self.type == SensorType.AUDIO:
        # use the model to update the first timestamp or add the best offset (model's intercept value)
        self.data_df["timestamps"] = \
            calc_evenly_sampled_timestamps(offset_model.update_time(self.first_data_timestamp(),
                                                                    use_model_function),
                                           self.num_samples(),
                                           slope)
    else:
        self.data_df["timestamps"] = offset_model.update_timestamps(self.data_timestamps(), use_model_function)
    time_diffs = np.floor(np.diff(self.data_timestamps()))
    if len(time_diffs) > 1:
        self.sample_interval_s = dtu.microseconds_to_seconds(slope)
        if self.sample_interval_s > 0:
            self.sample_rate_hz = 1 / self.sample_interval_s
            self.sample_interval_std_s = dtu.microseconds_to_seconds(np.std(time_diffs))
    self.timestamps_altered = True
class SensorType (value, names=None, *, module=None, qualname=None, type=None, start=1)

Enumeration of possible types of sensors to read data from

Expand source code
class SensorType(enum.Enum):
    """
    Enumeration of possible types of sensors to read data from
    """

    UNKNOWN_SENSOR = 0  # unknown sensor
    ACCELEROMETER = 1  # meters/second^2
    AMBIENT_TEMPERATURE = 2  # degrees Celsius
    AUDIO = 3  # normalized counts
    COMPRESSED_AUDIO = 4  # bytes (codec specific)
    GRAVITY = 5  # meters/second^2
    GYROSCOPE = 6  # radians/second
    IMAGE = 7  # bytes (codec specific)
    LIGHT = 8  # lux
    LINEAR_ACCELERATION = 9  # meters/second^2
    LOCATION = 10  # See standard
    MAGNETOMETER = 11  # microtesla
    ORIENTATION = 12  # radians
    PRESSURE = 13  # kilopascal
    PROXIMITY = 14  # on, off, cm
    RELATIVE_HUMIDITY = 15  # percentage
    ROTATION_VECTOR = 16  # Unitless
    INFRARED = 17  # this is proximity
    STATION_HEALTH = 18
    # battery charge and current level, phone internal temperature, network source and strength,
    # available RAM of the system, cell service status, amount of hard disk space left, power charging state
    BEST_LOCATION = 19  # See standard

    @staticmethod
    def type_from_str(type_str: str) -> "SensorType":
        """
        converts a string to a sensor type
        :param type_str: string to convert
        :return: a sensor type, UNKNOWN_SENSOR is the default for invalid inputs
        """
        if (
                type_str.lower() == "audio"
                or type_str.lower() == "mic"
                or type_str.lower() == "microphone"
        ):
            return SensorType.AUDIO
        elif type_str.lower() == "accelerometer" or type_str.lower() == "accel":
            return SensorType.ACCELEROMETER
        elif type_str.lower() == "ambient_temperature":
            return SensorType.AMBIENT_TEMPERATURE
        elif type_str.lower() == "compressed_audio":
            return SensorType.COMPRESSED_AUDIO
        elif type_str.lower() == "gravity":
            return SensorType.GRAVITY
        elif type_str.lower() == "gyroscope" or type_str.lower() == "gyro":
            return SensorType.GYROSCOPE
        elif type_str.lower() == "image":
            return SensorType.IMAGE
        elif type_str.lower() == "light":
            return SensorType.LIGHT
        elif (
                type_str.lower() == "linear_acceleration"
                or type_str.lower() == "linear_accel"
        ):
            return SensorType.LINEAR_ACCELERATION
        elif type_str.lower() == "location" or type_str.lower() == "loc":
            return SensorType.LOCATION
        elif type_str.lower() == "best_location" or type_str.lower() == "best_loc":
            return SensorType.BEST_LOCATION
        elif type_str.lower() == "magnetometer" or type_str.lower() == "mag":
            return SensorType.MAGNETOMETER
        elif type_str.lower() == "orientation":
            return SensorType.ORIENTATION
        elif (
                type_str.lower() == "pressure"
                or type_str.lower() == "bar"
                or type_str.lower() == "barometer"
        ):
            return SensorType.PRESSURE
        elif type_str.lower() == "proximity" or type_str.lower() == "infrared":
            return SensorType.PROXIMITY
        elif type_str.lower() == "relative_humidity":
            return SensorType.RELATIVE_HUMIDITY
        elif type_str.lower() == "rotation_vector":
            return SensorType.ROTATION_VECTOR
        else:
            return SensorType.UNKNOWN_SENSOR

Ancestors

  • enum.Enum

Class variables

var ACCELEROMETER
var AMBIENT_TEMPERATURE
var AUDIO
var BEST_LOCATION
var COMPRESSED_AUDIO
var GRAVITY
var GYROSCOPE
var IMAGE
var INFRARED
var LIGHT
var LINEAR_ACCELERATION
var LOCATION
var MAGNETOMETER
var ORIENTATION
var PRESSURE
var PROXIMITY
var RELATIVE_HUMIDITY
var ROTATION_VECTOR
var STATION_HEALTH
var UNKNOWN_SENSOR

Static methods

def type_from_str(type_str: str) ‑> SensorType

converts a string to a sensor type :param type_str: string to convert :return: a sensor type, UNKNOWN_SENSOR is the default for invalid inputs

Expand source code
@staticmethod
def type_from_str(type_str: str) -> "SensorType":
    """
    converts a string to a sensor type
    :param type_str: string to convert
    :return: a sensor type, UNKNOWN_SENSOR is the default for invalid inputs
    """
    if (
            type_str.lower() == "audio"
            or type_str.lower() == "mic"
            or type_str.lower() == "microphone"
    ):
        return SensorType.AUDIO
    elif type_str.lower() == "accelerometer" or type_str.lower() == "accel":
        return SensorType.ACCELEROMETER
    elif type_str.lower() == "ambient_temperature":
        return SensorType.AMBIENT_TEMPERATURE
    elif type_str.lower() == "compressed_audio":
        return SensorType.COMPRESSED_AUDIO
    elif type_str.lower() == "gravity":
        return SensorType.GRAVITY
    elif type_str.lower() == "gyroscope" or type_str.lower() == "gyro":
        return SensorType.GYROSCOPE
    elif type_str.lower() == "image":
        return SensorType.IMAGE
    elif type_str.lower() == "light":
        return SensorType.LIGHT
    elif (
            type_str.lower() == "linear_acceleration"
            or type_str.lower() == "linear_accel"
    ):
        return SensorType.LINEAR_ACCELERATION
    elif type_str.lower() == "location" or type_str.lower() == "loc":
        return SensorType.LOCATION
    elif type_str.lower() == "best_location" or type_str.lower() == "best_loc":
        return SensorType.BEST_LOCATION
    elif type_str.lower() == "magnetometer" or type_str.lower() == "mag":
        return SensorType.MAGNETOMETER
    elif type_str.lower() == "orientation":
        return SensorType.ORIENTATION
    elif (
            type_str.lower() == "pressure"
            or type_str.lower() == "bar"
            or type_str.lower() == "barometer"
    ):
        return SensorType.PRESSURE
    elif type_str.lower() == "proximity" or type_str.lower() == "infrared":
        return SensorType.PROXIMITY
    elif type_str.lower() == "relative_humidity":
        return SensorType.RELATIVE_HUMIDITY
    elif type_str.lower() == "rotation_vector":
        return SensorType.ROTATION_VECTOR
    else:
        return SensorType.UNKNOWN_SENSOR