Source code for aggregate.extensions.tweedie

from enum import Enum
try:
    from jax import grad     # EXTREMELY idle...
    import jax.numpy as jnp
except:
    pass
import logging
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import scipy.stats as ss
from typing import Optional, Tuple  # , Union, Any  # Import necessary types
from pathlib import Path

from IPython.display import display
# from collections import namedtuple
# from numpy import ndarray, dtype

from ..distributions import Aggregate  # noqa
from ..utilities import MomentWrangler
from ..underwriter import build
from .ft import FourierTools

# logging
logger = logging.getLogger(__name__)



[docs]
class Mode(Enum):
    """Mode of the Tweedie class."""
    REPRODUCTIVE = 1
    ADDITIVE = 2




[docs]
class Tweedie(object):
    """
    ``Tweedie`` is a class for working with the Tweedie class of
    exponential dispersion models, those with variance function

        V(μ) = dispersion x μ^p,

    where p is the power parameter and μ the mean. The Tweedie class
    of distributions includes:

    * 1 < p < 2: Tweedie distributions, a Poisson-gamma compound distributions
    * p = 1: Poisson distributions
    * p = 0: normal distributions
    * p < 0: spectrally positive stable distributions with index 1 < alpha < 2
    * p = infinity: spectrally positive Cauchy distribution with index alpha = 1
    * 2 < p < infinity: positive spectrally positive stable distributions with
      index 0 < alpha < 1
    * p = 3 (special case): inverse Gaussian and Levy(1/2) distributions
    * p = 2: gamma distributions

    The range 0 < p < 1 is impossible and does not correspond to an exponential
    dispersion model.

    The ``Tweedie`` class provides

    * Translation between the reproductive and additive parameterizations. The
      former is used to model averages like pure premiums that combined in weighted
      sums and the latter models totals (sums) that are simply additive.

    * Create frozen ``scipy.stats``-like distributions, either explicitly
      as a ``scipy.stats`` object from the normal, levy_stable, levy, inverse
      Gaussian, or gamma distribution, or as an ``Aggregate`` object otherwise.
    * For Tweedie distributions it translates parameters into Poisson frequency and
      gamma severity, the latter as mean and CV or shape and rate.
    * Create a ``aggregate.extensions.ft.FourierTools`` object for Fourier analysis
      that provides an approximation to the distribution for those without a
      closed form expression (extreme and positive extreme stable, titled Cauchy,
      Tweedie).
    * The generating distribution ``c``.
    * The cumulant function, ``kappa``.
    * The tilted cumulant function, ``K``
    * The characteristic function, ``chf``.
    * The Levy measure density, ``levy_density``, see [3]

    Methodology
    -----------

    The class is initialized with either the reproductive parameters (p, mean, dispersion)
    or the additive parameters (p, theta, index). The class then computes the missing
    parameters. The class creates a frozen distribution object on demand for the
    additive or reproductive distribution per those provided at initialization. Methods
    are available to create the dual. Methods are also available to translate between
    additive and reproductive parameter for the same distribution (not the dual).

    Notation
    =========

    The canonical parameter is always called theta, p is always p. The other parameters
    are named (mean, dispersion, index) or (frequency, shape, rate) or (sev_m, sev_cv).
    The dispersion is usually sigma^2. The index is lambda or phi (but the frequency is
    sometimes lambda).

    Notation follows Jorgensen [1] and [2]. My blog post on Tweedie uses -alpha for the
    shape.

    References
    =============
    [1] Jørgensen, Bent. "Exponential dispersion models." Journal of the Royal Statistical Society
        Series B: Statistical Methodology 49.2 (1987): 127-145.
    [2] Jorgensen, B. (1997). The theory of dispersion models. Chapman and Hall/CRC.
    [3] Jørgensen, Bent, and José Raúl Martínez. "THE LÉVY—KHINCHINE REPRESENTATION OF THE TWEEDIE
        CLASS." Brazilian Journal of Probability and Statistics (1996): 225-233.
    """

    _renamer = {'mean': '$\\mu$', 'p': '$p$', 'dispersion': '$\\phi$', 'theta': '$\\theta$', 'index': '$\\phi$',
                'frequency': '$\\lambda$', 'shape': '$\\alpha$', 'rate': '$\\beta$', 'sev_m': '$\\mu_X$',
                'sev_cv': '$\\nu_X$', 'cv': '$\\nu$', 'p0': '$\\Pr(X=0)$'}

    def __init__(self, p: float, *, mean: Optional[float] = None, dispersion: Optional[float] = 1,
                 theta: Optional[float] = None, index: Optional[float] = 1):
        """
        Initialize object from either reproductive p, mean (mu), dispersion (sigma^2) or additive
        p, theta (canonical), index (phi or lambda) parameters. All parameters except p, which is
        required must be given by name. See also the static method ``Tweedie.from_po_gamma`` to
        create a Tweedie distribution (1 < p < 2) from the claim count and severity parameters.

        Notice that alpha - 1 = -1 / (p - 1) (p. 131 after eq 4.10).

        See Also
        ---------

        * ``Tweedie.dual``: create the dual object.
        * ``Tweedie.from_p_mean_cv``: create a Tweedie object from p, mean, and cv.
        * ``Tweedie.from_po_gamma``: create a Tweedie object from the Poisson-gamma parameters.
        * ``Tweedie.from_p_mean_cv``: create a Tweedie object from p, mean, and cv.


        """
        if 0 < p < 1:
            raise ValueError(f'p cannot be in (0, 1), got {p}')

        alpha = self.p_to_alpha(p)

        if mean is not None and dispersion is not None:
            # reproductive
            self.mode = Mode.REPRODUCTIVE
            if dispersion <= 0:
                raise ValueError('Dispersion must be strictly positive')
            # _, theta, index = self.reproductive_to_additive(p, mean, dispersion)
            index = 1. / dispersion
            # Jorg [2], p 131 equation after 4.12
            if p == 1:
                theta = np.log(mean)
            else:
                # using (alpha - 1)(p - 1) = -1
                theta = (alpha - 1) * mean ** (1 - p)
        elif theta is not None and index is not None:
            # additive
            self.mode = Mode.ADDITIVE
            if index <= 0:
                raise ValueError('Index must be strictly positive')
            # _, mean, dispersion = self.additive_to_reproductive(p, theta, index)
            dispersion = 1. / index
            # Jorg [2], p 131 equation after 4.12
            if p == 1:
                mean = np.exp(theta)
            else:
                mean = ((1 - p) * theta) ** (alpha - 1)

        prefix = ('' if theta == 0 else 'tilted ')
        match p:
            case 0:
                self.name = "Gaussian"
            case 1:
                self.name = "Poisson"
            case 2:
                self.name = "gamma"
            case 3:
                if theta == 0:
                    self.name = "Lévy"
                else:
                    self.name = "inverse Gaussian"
            case p if 1 < p < 2:
                self.name = "Tweedie"
            case p if p < 0:
                self.name = prefix + "extreme stable"
            case p if p > 2:
                self.name = prefix + "positive extreme stable"
            case p if np.isinf(p):
                self.name = prefix + "Landau"
            case _:
                raise ValueError(f'Invalid p: {p}')

        # set member variables
        self.p = p
        self.alpha = alpha
        self.mean = mean
        self.dispersion = dispersion
        self.theta = theta
        self.index = index
        # variables prefixed tw_ refer to the created Tweedie distribution
        self.tw_variance = dispersion * mean ** p if self.mode == Mode.REPRODUCTIVE else index * mean ** p
        self.tw_sd = np.sqrt(self.tw_variance)
        self.tw_cv = self.tw_sd / mean
        self._fz = None  # for the frozen distribution object
        self._ft = None


[docs]
    def dual(self):
        """Create the dual object."""
        match self.mode:
            case Mode.REPRODUCTIVE:
                return Tweedie(self.p, theta=self.theta, index=self.index)
            case Mode.ADDITIVE:
                return Tweedie(self.p, mean=self.mean, dispersion=self.dispersion)



[docs]
    @staticmethod
    def from_p_mean_cv(p: float, mean: float, cv: float) -> "Tweedie":
        """
        Create a Tweedie object from the parameters p, mean, and the output
        Tweedie cv. The dispersion is computed by equating two expressions
        for the variance:

              (cv * mean) ** 2 = dispersion * mean^p.
          ==>       dispersion = cv **2 * mean ** (2 - p)

        The object is created using p, mean, and dispersion in reproductive
        mode.
        """
        dispersion = cv ** 2 * mean ** (2 - p)
        return Tweedie(p=p, mean=mean, dispersion=dispersion)



[docs]
    @staticmethod
    def from_po_gamma(frequency: float, *, sev_m: Optional[float] = None, sev_cv: Optional[float] = None,
                      sev_shape: Optional[float] = None, sev_rate: Optional[float] = None,
                      tw_cv: Optional[float] = None) -> "Tweedie":
        """
        Create a Tweedie object for the compound Poisson-gamma case, 1 < p < 2. Inputs

        * frequency: mean number of claims, required
        * One of

            * sev_m: mean severity and sev_cv or
            * sev_shape and sev_rate or
            * sev_m and tw_cv (overall distribution cv)

        Gamma distribution with density f(x) = x^(shape-1) exp(-x rate) / Gamma(shape) for x > 0.

        Higher rate (Poisson rate of emission of particles per unit time) results in a lower mean
        waiting time for n to appear. Gamma(shape, rate) has mean shape / rate and variance
        shape / rate^2. Hence, cv^2 = 1 / shape.

        The object is created using p, mean, and dispersion in reproductive
        mode, same as ``from_p_mean_cv``.
        """
        if sev_m is not None and sev_cv is not None:
            #
            sev_shape = sev_cv ** -2.
            sev_rate = sev_shape / sev_m
        elif sev_shape is not None and sev_rate is not None:
            sev_m = sev_shape / sev_rate
        elif sev_m is not None and tw_cv is not None:
            tw_mean = frequency * sev_m
            tw_var = (tw_mean * tw_cv) ** 2
            # tw_var = freq alpha (alpha + 1) / beta^2 and alpha / beta = sev_m
            # subst for alpha and solve for beta = sev_rate
            sev_rate = tw_mean / (tw_var - frequency * sev_m ** 2)
            sev_shape = sev_m * sev_rate
        else:
            raise ValueError('Must input sev_m and sev_cv, or sev_shape and sev_rate, or sev_m and tw_cv')

        # now figure the Tweedie class reproductive parameters: have all four parameters to
        # play with
        shape = np.array(sev_shape)
        # know that sh=0 have p=2 (gamma), sh=1 have p=1 (Poisson), p >= 0
        # so p = (shape + 2) / (shape + 1)
        p = (shape + 2) / (shape + 1)
        mean = frequency * sev_m
        # variance = mean^p * dispersion = frequency * shape * (shape + 1) / sev_rate^2
        dispersion = frequency * shape * (shape + 1) * sev_rate ** -2 / mean ** p
        return Tweedie(p=p, mean=mean, dispersion=dispersion)



[docs]
    @staticmethod
    def p_to_alpha(p: float) -> float:
        """Convert p to Jorgensen's alpha, [2] eq 4.9 p 131."""
        # NumPy follows IEEE 754, where division by zero results in np.inf or -np.inf, rather
        # than raising an exception. GPT tells me this is OK practice.
        if np.isinf(p):
            return 1.
        else:
            p = np.array(p)
            return (p - 2) / (p - 1)



[docs]
    @staticmethod
    def alpha_to_p(alpha: float) -> float:
        """Convert alpha to p, [2] eq 4.10 p 131."""
        alpha = np.array(alpha)
        return (alpha - 2) / (alpha - 1)



[docs]
    @staticmethod
    def tau(p: float, theta: float) -> float:
        """
        Convert theta (canonical parameter) to mean, using tau = kappa'(theta).

        Evaluate the tau function, [2] below eq 4.12 p 131.
        Recall (1-alpha)(1-p) = -1.
        Assumes dispersion and index = 1, see additive_to_reproductive
        for more general function.
        """
        if 0 < p < 1:
            raise ValueError(f'p cannot be in (0, 1), got {p}')
        if p == 1:
            return jnp.exp(theta)
        elif np.isinf(p):
            # Cauchy, [2] p. 163
            return -jnp.log(-theta) if theta < 0 else 0.
        elif p == 0:
            # normal
            return theta
        elif (p > 2 or p < 0) and theta == 0:
            # valid cases where mean does not exist
            return np.inf
        elif theta == 0:
            raise ValueError(f'Invalid theta=0 for p={p}')
        else:
            # [2] says mu = (theta/(alpha - 1)) ** (alpha - 1)
            # but alpha - 1 = -1 / (p - 1) = 1 / (1 - p)
            # and we have ruled out p = 1
            # a = Tweedie.p_to_alpha(p)
            return ((1 - p) * theta) ** (1 / (1 - p))

                # m2 = (theta / (a - 1)) ** (a - 1)
                # if not np.isinf(p):
                #     if np.isinf(m1) and np.isinf(m2):
                #         pass
                #     else:
                #         assert abs(m1 - m2) < 1e-15, f'Tweedie.tau: {p=}, {theta=}, {m1=}, {m2=}'


[docs]
    @staticmethod
    def tau_inverse(p: float, mean: float) -> float:
        """
        Convert mean to theta (canonical parameter) using tau_inverse.

        Evaluate the inverse of the tau function, [2] below eq 4.12 p 131.
        Assumes dispersion and index = 1, see reproductive_to_additive
        for more general function.
        In GLMs tau_inverse is called the canonical link function, mapping
        the mean to the canonical parameter.
        """
        if p == 1:
            # poisson, canonical link is log
            return np.log(mean)
        elif np.isinf(p):
            return -np.exp(-mean)
        else:
            # inverse of tau function
            return mean ** (1 - p) / (1 - p)



[docs]
    @staticmethod
    def additive_to_reproductive(p: float, theta: float, index: float) -> Tuple[float, float, float]:
        """
        Convert additive parameters to reproductive parameters *for the same distribution*.

        Warning: this is not the dual distribution!

        Uses [2] eq 4.8 p. 130.
        """
        # note when p==1 (Poisson), this is still correct
        return (p,
            index * Tweedie.tau(p, theta),
            index ** (1 - p)
            )



[docs]
    @staticmethod
    def reproductive_to_additive(p: float, mean: float, dispersion: float) -> Tuple[float, float, float]:
        """
        Convert reproductive parameters to additive parameters *for the same distribution*.

        Warning: this is not the dual distribution!

        Uses [2] Exercise 4.5 and equation below 4.8 on p. 130 (that has a typo).
        """
        if p == 1:
            # Poisson
            if dispersion == 1:
                return p, np.log(mean), 1.
            else:
                raise ValueError(f'Poisson additive requires dispersion=1, not {dispersion}')
        else:
            # note slightly evil, dispersion = sigma^2 = 1 / index, so
            # re-enter the square!
            print(f'{p=}, {mean=}, {dispersion=}, {mean * dispersion ** (1 / (p - 1))=}')
            return (p,
                    Tweedie.tau_inverse(p, mean * dispersion ** (1 / (p - 1))),
                    dispersion ** (1 / (1 - p))
                    )


    @property
    def reproductive(self)  -> Tuple[float, float, float]:
        """Return the reproductive parameters: p, mean, dispersion."""
        return self.p, self.mean, self.dispersion

    @property
    def additive(self) -> Tuple[float, float, float]:
        """Return the additive parameters: frequency (Poisson mean), gamma shape and rate."""
        return self.p, self.theta, self.index

    def _po_gamma_conniptions(self):
        """Compute the various Po-gamma compound parameters."""
        # Valid only when 1 < p < 2.
        assert 1 < self.p < 2, "Only valid for 1 < p < 2 distributions."

        # for both reproductive and additive
        shape = (2 - self.p) / (self.p - 1)
        # po-gamma (both modes)
        # mean = freq * shape / rate
        # var = freq * shape * (shape + 1) / rate^2
        match self.mode:
            case Mode.REPRODUCTIVE:
                # mean = self.mean
                # var = self.dispersion * self.mean^p
                rate = (shape + 1) / (self.dispersion * self.mean ** (self.p - 1))
                frequency = self.mean * rate / shape
            case Mode.ADDITIVE:
                # self.mean is computed from p and self.theta
                # mean = self.index * self.mean
                # var = self.index * self.mean^p
                rate = (shape + 1) / (self.index * self.mean ** (self.p - 1))
                frequency = self.index * self.mean  * rate / shape
        # this is just standard gamma distribution conniptions
        sev_m = shape / rate   # noqa
        sev_cv = shape ** -0.5
        # return all options
        return frequency, sev_m, sev_cv, shape, rate  # noqa

    @property
    def po_gamma_m_cv(self) ->  Tuple[float, float, float]:
        """Return Poisson-gamma parameters, specifying gamma using mean and cv."""
        assert 1 < self.p < 2, "Only valid for 1 < p < 2 distributions."
        f, m, cv, sh, rate = self._po_gamma_conniptions()
        return f, m, cv

    @property
    def po_gamma_shape_rate(self) -> Tuple[float, float, float]:
        """Return Poisson-gamma parameters, specifying gamma using shape and rate."""
        assert 1 < self.p < 2, "Only valid for 1 < p < 2 distributions."
        f, m, cv, sh, rate = self._po_gamma_conniptions()
        return f, sh, rate

    @property
    def fz(self):
        """
        Return the frozen distribution object, reproductive or additive depending
        on how the object was initialized.

        Poisson
        --------
        Rather than a scipy.stats object, which requires index=1, use
        an Aggregate object. This allows scaling. Note, for Poisson is
        only reproductive in general. It is

            Y ~ Po(index * exp(theta)) / index

        The Aggregate is also not fussy about pdf vs pmf.

        Gamma
        ------
        Per [2] p. 89 Ga*(theta, lambda) ~ Gamma with shape lambda and
        scale -theta.

        """
        if self._fz is None:
            match self.p:
                case 1:
                    # Poisson or over-dispersed Poisson
                    match self.mode:
                        case Mode.REPRODUCTIVE:
                            # ODP, needs to be an aggregate object, Y = Z / index
                            mean = self.index * self.mean
                            temp = ss.poisson(mean)
                            mx = temp.isf(1e-12)
                            log2 = int(np.ceil(np.log2(mx))) + 1
                            assert log2 <= 20, f'log2 too large: {log2}'
                            # build agg object to include scaling
                            self._fz = build(f'agg Po{id(self)} {mean} claims dsev [{1 / self.index}] poisson', update=False)
                            # update with appropriate bs and log2
                            self._fz.update(bs=1 / self.index, log2=log2)
                        case Mode.ADDITIVE:
                            self._fz = ss.poisson(self.index * self.mean)
                            # top unnecessary complaining
                            self._fz.pdf = self._fz.pmf
                case 2:
                    # gamma
                    match self.mode:
                        case Mode.REPRODUCTIVE:
                            self._fz = ss.gamma(self.index, scale=-1 / (self.theta * self.index))
                        case Mode.ADDITIVE:
                            self._fz = ss.gamma(self.index, scale=-1 / self.theta)
                case 3:
                    # inverse Gaussian or Levy Stable 1/2
                    match self.mode:
                        case Mode.REPRODUCTIVE:
                            if self.theta < 0:
                                self._fz = ss.invgauss(self.mean / self.index, scale=self.index)
                            else:
                                # has no shape parameters
                                self._fz = ss.levy(scale=self.index ** (2. - 1.))
                        case Mode.ADDITIVE:
                            if self.theta < 0:
                                self._fz = ss.invgauss(self.mean / self.index, scale=self.index ** 2)
                            else:
                                # has no shape parameters
                                self._fz = ss.levy(scale=self.index ** 2.)
                case 0:
                    # normal
                    match self.mode:
                        case Mode.REPRODUCTIVE:
                            self._fz = ss.norm(loc=self.mean, scale=1.)
                        case Mode.ADDITIVE:
                            self._fz = ss.norm(loc=self.mean, scale=self.dispersion ** 0.5)
                case self.p if 1 < self.p < 2:
                    # Tweedie distribution
                    # get frequency and gamma parameters
                    # po_gamma_shape_rate accounts for mode of self.
                    f, s, r = self.po_gamma_shape_rate
                    self._fz = build(f'agg TW{id(self)} {f} claims sev {1/r} * gamma {s} poisson')
                case self.p if 2 < self.p < np.inf:
                    # positive extreme stable
                    assert 0 < self.alpha < 1
                    if self.theta < 0:
                        NotImplementedError('Negative theta for positive extreme stable')
                    else:
                        match self.mode:
                            case Mode.REPRODUCTIVE:
                                self._fz = ss.levy_stable(self.alpha, 1.0, loc=0, scale=self.index ** (1 / self.alpha - 1))
                            case Mode.ADDITIVE:
                                self._fz = ss.levy_stable(self.alpha, 1.0, loc=0, scale=self.index ** (1 / self.alpha))
                case self.p if np.isinf(self.p):
                    # Cauchy
                    if self.theta != 0:
                        NotImplementedError('Negative theta for Cauchy; no closed form expression, create via FourierTools')
                    else:
                        print('set up Cauchy')
                        self._fz = ss.levy_stable(self.alpha, 1.0, loc=0, scale=self.index)
                    assert self.alpha == 1
                    match self.mode:
                        case Mode.REPRODUCTIVE:
                            pass
                        case Mode.ADDITIVE:
                            pass
                case self.p if self.p < 0:
                    # extreme stable, supported on whole real line
                    assert 1 < self.alpha < 2
                    # assert self.theta <= 0, f'Invalid theta for spectrally negative extreme stable'
                    # if self.theta < 0:
                    #     NotImplementedError('Negative theta for spectrally negative extreme; no closed form expression, create via FourierTools')
                    # return this regardless of theta (and mode)!
                    self._fz = ss.levy_stable(self.alpha, -1.0, scale=self.index ** (1 / self.alpha))
                    # match self.mode:
                    #     case Mode.REPRODUCTIVE:
                    #         # raise NotImplementedError('Reproductive spectrally positive extreme stable')
                    #     case Mode.ADDITIVE:
                    #         self._fz = ss.levy_stable(self.alpha, -1.0, scale=self.index ** (1 / self.alpha))
                case _:
                    # this should really be caught elsewhere an never occur!
                    raise ValueError(f'Invalid p: {self.p}')
        return self._fz

    @property
    def ft(self):
        """Create and return FourierTools object."""
        if self._ft is None:
            # scale_mode false because chf already incorporates scale
            self._ft = FourierTools(self.chf, self.fz, scale_mode=False)
        return self._ft


[docs]
    def stats(self):
        """Empirical moments of the distribution."""
        if isinstance(self.fz, Aggregate):
            return self.fz.agg_m, self.fz.agg_var, self.fz.agg_skew
        elif self.fz:
            return self.fz.stats('mvs')
        else:
            return np.array([np.nan]*3)



[docs]
    def audit(self):
        """
        Audit dataframe comparing definition, Fourier, and frozen stats.

        Moment code from aggregate xsden_to_meancvskew.
        """
        if self.ft._df is None:  # noqa
            raise ValueError('Must compute before estimating stats!')
        xs = np.array(self.ft.df.index)
        den = self.ft.df.p.values
        pg = 1 - den.sum()
        xd = xs * den
        if isinstance(xs, np.ndarray):
            xsm = xs[-1]
            bs = xs[1] - xs[0]
        elif isinstance(xs, pd.Series):
            xsm = xs.iloc[-1]
            bs = xs.iloc[1] - xs.iloc[0]
        else:
            _ = np.array(xs)
            xsm = _[-1]
            bs = _[1] - _[0]
        xsm = xsm + bs
        ex1 = np.sum(xd) + pg * xsm
        xd *= xs
        ex2 = np.sum(xd) + pg * xsm ** 2
        ex3 = np.sum(xd * xs) + pg * xsm ** 3
        vf = ex2 - ex1 ** 2
        # convert to cv and skew
        mw = MomentWrangler()
        mw.noncentral = ex1, ex2, ex3
        me, cve, ske = mw.mcvsk
        m, v, sk = self.stats()
        cv = v ** .5 / m
        # moments from the definition
        if self.mode == Mode.REPRODUCTIVE:
            am = self.mean
            va = self.dispersion * self.mean ** self.p
        else:
            am = self.mean * self.index
            va = self.index * self.mean ** self.p
        cva = va ** .5 / am
        # skewness IDLE grad
        f = lambda x: self.tau(self.p, x)
        if self.mode == Mode.REPRODUCTIVE:
            ska = grad(grad(f))(self.theta) * self.index ** -2 / self.tw_variance ** 1.5
        elif self.mode == Mode.ADDITIVE:
            ska = grad(grad(f))(self.theta) * self.index / self.tw_variance ** 1.5
        # assemble answer
        actual = 'reproductive' if self.mode == Mode.REPRODUCTIVE else 'additive'
        ans = pd.DataFrame({actual: [am, va, cva, ska], 'fourier': [me, vf, cve, ske], 'frozen': [m, v, cv, sk]},
                          index=['EX', 'var', 'CV', 'Skew'])
        ans['abs err'] = (ans.fourier - ans.frozen).abs()
        ans['rel err'] = ans['abs err'] / ans['frozen']
        return ans



[docs]
    def kappa(self, theta: float):
        """
        Cumulant function of the carrier distribution.

        Jorgensen [2] equation 4.13, p. 131.
        """
        if self.p == 1:
            return np.exp(theta)
        elif self.p == 2:
            return -np.log(-theta)
        elif np.isinf(self.p):
            # [2] p. 163
            # print(theta - theta * np.log(-theta))
            return np.where(theta==0, 0,
                            theta - theta * np.log(-theta))
        else:
            # per above case are guaranteed alpha != 1
            assert self.alpha != 1, "WTF??"
            return ((self.alpha - 1) / self.alpha *
                    (theta / (self.alpha - 1)) ** self.alpha)



[docs]
    def K_additive(self, s):
        """
        Cumulant function, reflecting theta and in additive form.

         See [2] eq. 3.4 and eq 4.15.
         """
        return self.index * (self.kappa(self.theta + s) - self.kappa(self.theta))



[docs]
    def K_reproductive(self, s):
        """
        Cumulant function, reflecting theta and in additive form.

        See, [2] eq. 3.5 and eq 4.15.
        """
        return self.index * (self.kappa(self.theta + s / self.index) - self.kappa(self.theta))



[docs]
    def K(self, s):
        """Cumulant function, reflecting how the class was initialized."""
        match self.mode:
            case Mode.REPRODUCTIVE:
                return self.K_reproductive(s)
            case Mode.ADDITIVE:
                return self.K_additive(s)


    # def K_manual(self, s):
    #     """
    #     Cumulant function, reflecting theta and in the additive form.
    #
    #     K(s) = lambda * (kappa(theta + s) - kappa(theta))
    #
    #     Per Jorg [2] p. 132 Eq 4.15. Note special treatment
    #     of theta = 0, since 4.15 divides by theta.
    #     Belt 'n braces tester: the same as K!
    #     """
    #     if self.theta == 0:
    #         assert self.p != 2, 'p=2 does not allow theta = 0'
    #         return self.index * (self.kappa(s) - self.kappa(0))
    #     assert self.index != 0, 'unexpected, index cannot be 0'
    #     # functions are now scaled by index
    #     if self.p == 1:
    #         return self.index * np.exp(self.theta) * (np.exp(s) - 1)
    #     elif self.p == 2:
    #         return -self.index * np.log(1 + s / self.theta)
    #     else:
    #         # p != 1, 2
    #         return self.index * self.kappa(self.theta) * ((1 + s / self.theta) ** self.alpha - 1)


[docs]
    def chf(self, t):
        """Characteristic function."""
        return np.exp(self.K(1j * t))



[docs]
    def levy_density(self, x):
        """Levy density."""
        raise NotImplementedError('Levy density not implemented')



[docs]
    @staticmethod
    def reproductive_to_tweedie_compound(p, mean, dispersion):
        """
        Convert Tweedie reproductive parameters to additive parameters for the compound
        Poisson-gamma distribution.

        This is the inverse of the Poisson-gamma conversion.
        """
        if not (1 < p < 2):
            raise ValueError(f'Only valid for 1 < p < 2 distribution, not {p=}')
        var = dispersion * mean ** p
        shape = (2 - p) / (p - 1)
        beta = var / (shape + 1)
        frequency = mean / (shape * beta)
        return frequency, shape, beta



[docs]
    def to_series(self):
        """Convert to Series."""
        mode = 'reproductive' if self.mode == Mode.REPRODUCTIVE else 'additive'
        ans = pd.Series([mode, repr(self), self.p, self.mean, self.dispersion, self.theta, self.index, self.tw_cv],
                        index=pd.Index(['mode', 'key', 'p', 'mean', 'dispersion', 'theta', 'index',
                                        'tw_cv'],
                                       name='quantity'),
                        name='value')
        return ans



[docs]
    def to_series_ex(self):
        """As to_series but includes Tweedie compound parameters."""
        if not (1 < self.p < 2):
            raise ValueError(f'Only valid for 1 < p < 2 distribution, not {self.p=}')
        frequency, sev_m, sev_cv, shape, rate  = self._po_gamma_conniptions()
        ans2 = pd.Series([frequency, sev_m, sev_cv, shape, rate],
                         index=['frequency', 'sev_m', 'sev_cv', 'shape', 'rate'],
                         name='quantity')
        return pd.concat([self.to_series(), ans2])



[docs]
    def to_tex(self):
        df = self.to_frame().rename(index=self._renamer)
        df['parameterization'] = ['Reproductive'] * 3 + ['Additive'] * 2  + ['Tw stats']
        df = df.reset_index(drop=False).set_index(['parameterization', 'quantity'])
        return df



[docs]
    def to_frame(self):
        """Convert to DataFrame."""
        return self.to_series().to_frame()



[docs]
    def to_json(self):
        return self.to_series().to_dict()



[docs]
    def to_decl(self, name='TW'):
        """Parameters needed to feed to Aggregate using tweedie keyword."""
        if self.mode == Mode.REPRODUCTIVE:
            return f'agg {name} tweedie {self.mean} {self.p} {self.dispersion}'
        else:
            raise NotImplementedError('Additive decl not yet implemented')


    def __repr_html__(self):
        return self.to_frame().to_html()

    def __repr__(self):
        if self.mode == Mode.ADDITIVE:
            return f'Tweedie*(p={self.p:.3g}, θ={self.theta:.3g}, λ={self.index:.3g})'
        elif self.mode == Mode.REPRODUCTIVE:
            return f'Tweedie(p={self.p:.3g}, μ={self.mean:.3g}, σ^2={self.dispersion:.3g}; θ={self.theta:.3g})'
        else:
            return f'WTF is that...mode={self.mode}'

    # def __str__(self):
    #     star = '*' if self.mode == Mode.ADDITIVE else ''
    #     return f'Tweedie{star}(p={self.p:.3g}, μ={self.mean:.3g}, sigma2={self.dispersion:.3g}, θ={self.theta:.3g})'

    def _repr_mimebundle_(self, include=None, exclude=None):  # noqa
        return {
            # "application/json": {"key": "value"},
            "text/html": self.to_frame().to_html(),
            "text/plain": repr(self)
        }


[docs]
    def test(self, bs=None, x_min=0, log2=16, s=1e-8, plot=True, simpson=False):
            """Test the Tweedie class object."""
            n = 1 << log2
            if bs is None:
                bs = (self.ft.fz.ppf(1-s) - x_min) / n
                bs = 2 ** np.round(np.log2(bs), 0)
                print(f'{bs=}, {1/bs=}')
            self.ft.invert(log2, x_min=x_min, bs=bs)
            if self.ft.fz is not None:
                self.ft.compute_exact()
            if plot:
                self.ft.plot(suptitle=self.name, verbose=False)
                # for ax in self.ft.last_fig.axes[:2]:
                #     l, u = ax.get_xlim()
                #     ax.set(xlim=[-u/20, u])
            else:
                if plot:
                    self.ft.df.plot(figsize=(3, 2.25))
            if simpson:
                self.ft.invert_simpson(log2=log2, x_min=x_min, bs=bs)
                ax = self.ft.df.plot(figsize=(3,2.25), logy=True, title='2 methods')
                if self.ft.fz is not None:
                    self.ft.df_exact.plot(ax=ax)
            return self.audit()





[docs]
def make_test_suite(mode=Mode.REPRODUCTIVE):
    """Create a test suite for the Tweedie class."""
    p = Path(r'C:\Users\steve\S\TELOS\Blog\quarto\ConvexConsiderations\posts\notes\2025-02-06-Tweedie-distributions\test-cases.csv')
    assert p.exists()
    tests = pd.read_csv(p)
    # convert str to list:
    tests.theta = tests.theta.map(eval)
    tests['index'] = tests['index'].map(eval)
    # explode lists
    tests_ex = tests.explode('theta').explode('index').reset_index(drop=True)
    tests_ex.index.name = 'n'
    # convert to floats
    for c in ['p', 'alpha', 'theta', 'index']:
        tests_ex[c] = [eval(i) if type(i)==str else float(i)  for i in tests_ex[c]]

    # figure reproductive parameters: tau is not vectorized
    tests_ex['mu'] = [Tweedie.tau(p, t) for p, t in tests_ex[['p', 'theta']].values]
    tests_ex['dispersion'] = 1 / tests_ex['index']

    # figure expected repro moments
    tests_ex['mean_reproductive'] = tests_ex['mu']
    tests_ex['variance_reproductive'] = tests_ex['mu'] ** tests_ex['p'] * tests_ex['dispersion']
    tests_ex['sd'] = tests_ex.variance_reproductive ** 0.5
    tests_ex['cv'] = tests_ex.sd / tests_ex['mean_reproductive']
    tests_ex = tests_ex.drop(columns=['sd', 'notes'])

    # expected additive moments
    tests_ex['mean_additive'] = tests_ex['mu'] * tests_ex['index']
    tests_ex['var_additive'] = tests_ex['index'] * tests_ex['mu'] ** tests_ex['p']
    tests_ex['cv_additive'] = tests_ex.var_additive ** 0.5 / tests_ex.mean_additive
    return tests_ex




[docs]
def run_test(p, theta, index, log2=16, bs=None, s=1e-12, **kwargs):
    """Run a test of the Tweedie class."""
    # additive
    ta = Tweedie(p, theta=theta, index=index)
    print(ta)
    ans = ta.test(bs=bs, log2=log2, s=s, **kwargs)
    if 1 < p < 2:
        f = ta.po_gamma_shape_rate
        print(f, ta.po_gamma_m_cv, sep='\n')
        print(np.exp(-f[0]))
    display(ans)
    print('='*80)
    # gamma reproductive ====================================
    tr = ta.dual()
    print(tr)
    ans = tr.test(bs=bs, log2=log2, s=s, **kwargs)
    if 1 < p < 2:
        f = tr.po_gamma_shape_rate
        print(f, tr.po_gamma_m_cv, sep='\n')
        print(np.exp(-f[0]))
    display(ans)
    return ta, tr




[docs]
def tweedie_illustration():
    """Make the usual graph of Tweedie distributions."""
    # redo with Jorg sign for alpha
    alpha = np.linspace(-4, 2, 101)
    p = (-alpha+2) / (-alpha+1)

    f, ax = plt.subplots(figsize=(5,5))
    # want to cut out the vertical line and color code by type of distribution
    idx = (1 <= p) * (p < 2)
    ax.plot(alpha[idx], p[idx], lw=3, c='C0')
    idx = 2 <= p
    ax.plot(alpha[idx], p[idx], lw=3, c='C1')
    idx = p <= 0
    ax.plot(alpha[idx], p[idx], lw=3, c='C2')

    # asymptotes
    ax.axhline(1, c='g', lw=.5, ls='--')
    ax.axhline(2, c='g', lw=.5, ls='--')
    ax.axvline(1, c='g', lw=.5, ls='--')

    # axes
    ax.axvline(0, c='k', lw=.25)
    ax.axhline(0, c='k', lw=.25)

    # ticks and axis labels
    ax.set(xlabel='$\\alpha=(p-2)/(p-1)$,  jump tail density $\\propto e^{\\theta x}/x^{\\alpha + 1}$', #  base jump density is $x^{\\bar\\alpha}$',
           ylabel='Variance power function $p$, $V(\\mu)=\\mu^p$, $p=(\\alpha-2)/(\\alpha-1)$',
           ylim=[-5,10.3], xlim=[-5., 2.5])
    ax.yaxis.set_major_locator(ticker.FixedLocator(np.arange(-4,11,1)))
    ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: f'{x:.0f}')) #  if int(x)%2==0 or x==3 else '' ))
    # ax.yaxis.set_minor_locator(ticker.FixedLocator(np.arange(-4,10,0.5)))
    # ax.xaxis.set_minor_locator(ticker.FixedLocator(np.arange(-2,3,0.5)))

    # annotations
    def ql(x, y, t, dot=True, ha='right', va='bottom'):
        """Handy annotator function."""
        voff = {'bottom': 0.2, 'top': -.2, 'center': 0.}
        hoff = {'left': 0.2, 'right': -0.2, 'center': 0.}
        ax.text(x + hoff[ha], y + voff[va], t, ha=ha, va=va)
        if dot:
            ax.plot(x,y, 'ko', ms=5)

    ql(2,    0, 'Gaussian', ha='center')
    ql(1,   10, 'Landau\nas $p\\to\\infty$', ha='left', va='top')
    ql(.5,   3, 'Lévy stable 3/2\ninv Gaussian', ha='right')
    ql(0,    2, 'Gamma')
    ql(-4.5, 1, 'Poisson\n$\\alpha\\to-\\infty$', va='top', ha='center')

    ql(1.25, -3, 'Extreme\nstable,\n$\\theta ≥ 0$', False, ha='left', va='center')
    ql(0.5, -3, 'Positive\nextreme\nstable,\n$\\theta ≤ 0$', False, ha='center', va='center')
    ql(-2,  -3, 'Tweedie,\n$\\theta < 0$', False, ha='center', va='center')

    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)