Source code for curepy.container.measurement

"""Container for Measurement data"""

import numpy as np
from typing import Optional, Union
import comet_maths as cm
import curepy.utilities.utilities as util



[docs]
class Measurement:
    def __init__(
        self,
        y: np.ndarray,
        u_y_total: Optional[np.ndarray] = None,
        u_y_rand: Optional[np.ndarray] = None,
        u_y_syst: Optional[np.ndarray] = None,
        corr_y: Optional[Union[str, np.ndarray]] = None,
        skip_invcov: bool = False,
    ) -> None:
        """
        Container class for measurement variable data.

        :param y: Measurement variable.
        :param u_y_total: Total uncertainty of measurement variable; must have the same
            shape as ``y``.
        :param u_y_rand: Random uncertainty of measurement variable; must have the same
            shape as ``y``.
        :param u_y_syst: Systematic uncertainty of measurement variable; must have the same
            shape as ``y``.
        :param corr_y: Error-correlation of the measurement variable.
            Accepted values: ``None``, ``"rand"`` (random), ``"syst"``
            (systematic), or a square matrix whose side length equals the
            length of ``y``.
        :param skip_invcov: If ``True``, skip the computation of the inverse covariance matrix (which is only needed for certain retrieval methods like optimal estimation).
        """

        u_y_total, corr_y = self._format_uncertainty(
            u_y_total, u_y_rand, u_y_syst, corr_y
        )

        self.y = y
        self.u_y = u_y_total
        self.y_flat, self.u_y_flat, self.y_shape = self._flatten_inputs(
            self.y, self.u_y
        )

        self.corr_y = util.format_correlation(self.y_flat, corr_y)

        self.corr_y, self.cholesky, self.W = self.return_corr_cholesky_whitening(
            self.corr_y
        )

        self._check_shapes(self.y_flat, self.u_y_flat, self.corr_y)

        if corr_y is not None and not skip_invcov:
            self.invcov = self.calculate_inv_cov(self.u_y_flat, self.corr_y)
        else:
            self.invcov = None

    @staticmethod
    def _flatten_inputs(
        y: np.ndarray,
        u_y: Optional[np.ndarray],
    ) -> tuple:
        """
        Flatten the measurement variable and its uncertainties.

        :param y: Measurement variable array.
        :param u_y: Uncertainty array for the measurement variable, or
            ``None`` if uncertainties are not provided.
        :returns: Tuple of ``(y_flat, u_y_flat, y_shape)`` where
            ``u_y_flat`` is ``None`` when ``u_y`` is ``None``.
        """
        y_flat, y_shape = util.flatten_array(y)
        if u_y is not None:
            u_y_flat, u_y_shape = util.flatten_array(u_y)
            if y_shape != u_y_shape:
                raise ValueError(
                    "Measurement variable, y, and related uncertainties, u_y, have different shapes:",
                    y_shape,
                    u_y_shape,
                )
        else:
            u_y_flat = None

        return y_flat, u_y_flat, y_shape

    @staticmethod
    def _check_shapes(
        y: np.ndarray,
        u_y: Optional[np.ndarray],
        corr_y: Optional[np.ndarray],
    ) -> None:
        """
        Check that the shapes of ``y``, ``u_y``, and ``corr_y`` are
        mutually compatible.

        :param y: Flattened measurement variable.
        :param u_y: Flattened uncertainty array, or ``None``.
        :param corr_y: Error-correlation matrix, or ``None``.
        """
        N = len(y)

        if u_y is not None and len(u_y) != N:
            raise ValueError(
                "Length of measured variable y must match length of uncertainty variable"
            )

        if u_y is None and corr_y is not None:
            raise ValueError(
                "Uncertainties must be defined if error correlation matrix is defined"
            )

        if corr_y is not None and corr_y.shape[0] != corr_y.shape[1]:
            raise ValueError("Error correlation matrix must be a square matrix")

        if corr_y is not None and corr_y.shape[0] != N:
            raise ValueError(
                "Length of measured variable y must match side length of error correlation matrix"
            )

    @staticmethod
    def _format_uncertainty(u_total, u_rand, u_syst, corr):

        if u_total is None and u_rand is None and u_syst is None:
            return None, None
        elif u_total is not None and u_rand is None and u_syst is None:
            return u_total, corr
        elif u_total is not None and (u_rand is not None or u_syst is not None):
            raise ValueError("If u_y_total is set, u_y_rand and u_y_syst must be None")
        elif u_total is None and u_rand is not None and u_syst is None:
            return u_rand, "rand"
        elif u_total is None and u_rand is None and u_syst is not None:
            return u_syst, "syst"
        elif u_total is None and u_rand is not None and u_syst is not None:
            tot = np.sqrt(u_rand**2 + u_syst**2)
            tot_cov = cm.convert_corr_to_cov(
                np.eye(len(u_rand.flatten())), u_rand.flatten()
            ) + cm.convert_corr_to_cov(
                np.ones((len(u_syst.flatten()), len(u_syst.flatten()))),
                u_syst.flatten(),
            )
            tot_corr = cm.convert_cov_to_corr(tot_cov, tot)
            return tot, tot_corr


[docs]
    @staticmethod
    def return_corr_cholesky_whitening(corr: Optional[np.ndarray]) -> tuple:
        """
        Return the correlation matrix, its Cholesky decomposition, and the whitening matrix.

        :param corr: Correlation matrix, or ``None``.
        :returns: Tuple of ``(corr, cholesky, W)`` where ``cholesky`` is
            the Cholesky decomposition of the correlation matrix, or
            ``None`` if ``corr`` is ``None``, and ``W`` is the whitening matrix.
        """
        if corr is not None:
            try:
                cholesky = np.linalg.cholesky(corr)
                W = np.linalg.solve(cholesky, np.eye(cholesky.shape[0]))
                return corr, cholesky, W
            except np.linalg.LinAlgError:
                # If the correlation matrix is not positive definite, use the nearest positive definite matrix
                corr_pd = cm.nearestPD_cholesky(corr, return_cholesky=False, corr=True)
                cholesky = np.linalg.cholesky(corr_pd)
                W = np.linalg.solve(cholesky, np.eye(cholesky.shape[0]))
                return corr_pd, cholesky, W
        else:
            return None, None, None



[docs]
    @staticmethod
    def calculate_inv_cov(unc: np.ndarray, corr: np.ndarray) -> np.ndarray:
        """
        Calculate the inverse covariance matrix.

        :param unc: Uncertainty (standard deviation) array.
        :param corr: Correlation matrix.
        :returns: Inverse of the covariance matrix.
        """

        cov = cm.convert_corr_to_cov(corr, unc)

        if np.array_equal(cov, np.diag(np.diag(cov))):
            return np.diag(1 / np.diag(cov))
        else:
            return np.linalg.inv(cov)