Source code for aggregate.extensions.tweedie

from enum import Enum
try:
    from jax import grad     # EXTREMELY idle...
    import jax.numpy as jnp
except:
    pass
import logging
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import scipy.stats as ss
from typing import Optional, Tuple  # , Union, Any  # Import necessary types
from pathlib import Path

from IPython.display import display
# from collections import namedtuple
# from numpy import ndarray, dtype

from ..distributions import Aggregate  # noqa
from ..utilities import MomentWrangler
from ..underwriter import build
from .ft import FourierTools

# logging
logger = logging.getLogger(__name__)


[docs] class Mode(Enum): """Mode of the Tweedie class.""" REPRODUCTIVE = 1 ADDITIVE = 2
[docs] class Tweedie(object): """ ``Tweedie`` is a class for working with the Tweedie class of exponential dispersion models, those with variance function V(μ) = dispersion x μ^p, where p is the power parameter and μ the mean. The Tweedie class of distributions includes: * 1 < p < 2: Tweedie distributions, a Poisson-gamma compound distributions * p = 1: Poisson distributions * p = 0: normal distributions * p < 0: spectrally positive stable distributions with index 1 < alpha < 2 * p = infinity: spectrally positive Cauchy distribution with index alpha = 1 * 2 < p < infinity: positive spectrally positive stable distributions with index 0 < alpha < 1 * p = 3 (special case): inverse Gaussian and Levy(1/2) distributions * p = 2: gamma distributions The range 0 < p < 1 is impossible and does not correspond to an exponential dispersion model. The ``Tweedie`` class provides * Translation between the reproductive and additive parameterizations. The former is used to model averages like pure premiums that combined in weighted sums and the latter models totals (sums) that are simply additive. * Create frozen ``scipy.stats``-like distributions, either explicitly as a ``scipy.stats`` object from the normal, levy_stable, levy, inverse Gaussian, or gamma distribution, or as an ``Aggregate`` object otherwise. * For Tweedie distributions it translates parameters into Poisson frequency and gamma severity, the latter as mean and CV or shape and rate. * Create a ``aggregate.extensions.ft.FourierTools`` object for Fourier analysis that provides an approximation to the distribution for those without a closed form expression (extreme and positive extreme stable, titled Cauchy, Tweedie). * The generating distribution ``c``. * The cumulant function, ``kappa``. * The tilted cumulant function, ``K`` * The characteristic function, ``chf``. * The Levy measure density, ``levy_density``, see [3] Methodology ----------- The class is initialized with either the reproductive parameters (p, mean, dispersion) or the additive parameters (p, theta, index). The class then computes the missing parameters. The class creates a frozen distribution object on demand for the additive or reproductive distribution per those provided at initialization. Methods are available to create the dual. Methods are also available to translate between additive and reproductive parameter for the same distribution (not the dual). Notation ========= The canonical parameter is always called theta, p is always p. The other parameters are named (mean, dispersion, index) or (frequency, shape, rate) or (sev_m, sev_cv). The dispersion is usually sigma^2. The index is lambda or phi (but the frequency is sometimes lambda). Notation follows Jorgensen [1] and [2]. My blog post on Tweedie uses -alpha for the shape. References ============= [1] Jørgensen, Bent. "Exponential dispersion models." Journal of the Royal Statistical Society Series B: Statistical Methodology 49.2 (1987): 127-145. [2] Jorgensen, B. (1997). The theory of dispersion models. Chapman and Hall/CRC. [3] Jørgensen, Bent, and José Raúl Martínez. "THE LÉVY—KHINCHINE REPRESENTATION OF THE TWEEDIE CLASS." Brazilian Journal of Probability and Statistics (1996): 225-233. """ _renamer = {'mean': '$\\mu$', 'p': '$p$', 'dispersion': '$\\phi$', 'theta': '$\\theta$', 'index': '$\\phi$', 'frequency': '$\\lambda$', 'shape': '$\\alpha$', 'rate': '$\\beta$', 'sev_m': '$\\mu_X$', 'sev_cv': '$\\nu_X$', 'cv': '$\\nu$', 'p0': '$\\Pr(X=0)$'} def __init__(self, p: float, *, mean: Optional[float] = None, dispersion: Optional[float] = 1, theta: Optional[float] = None, index: Optional[float] = 1): """ Initialize object from either reproductive p, mean (mu), dispersion (sigma^2) or additive p, theta (canonical), index (phi or lambda) parameters. All parameters except p, which is required must be given by name. See also the static method ``Tweedie.from_po_gamma`` to create a Tweedie distribution (1 < p < 2) from the claim count and severity parameters. Notice that alpha - 1 = -1 / (p - 1) (p. 131 after eq 4.10). See Also --------- * ``Tweedie.dual``: create the dual object. * ``Tweedie.from_p_mean_cv``: create a Tweedie object from p, mean, and cv. * ``Tweedie.from_po_gamma``: create a Tweedie object from the Poisson-gamma parameters. * ``Tweedie.from_p_mean_cv``: create a Tweedie object from p, mean, and cv. """ if 0 < p < 1: raise ValueError(f'p cannot be in (0, 1), got {p}') alpha = self.p_to_alpha(p) if mean is not None and dispersion is not None: # reproductive self.mode = Mode.REPRODUCTIVE if dispersion <= 0: raise ValueError('Dispersion must be strictly positive') # _, theta, index = self.reproductive_to_additive(p, mean, dispersion) index = 1. / dispersion # Jorg [2], p 131 equation after 4.12 if p == 1: theta = np.log(mean) else: # using (alpha - 1)(p - 1) = -1 theta = (alpha - 1) * mean ** (1 - p) elif theta is not None and index is not None: # additive self.mode = Mode.ADDITIVE if index <= 0: raise ValueError('Index must be strictly positive') # _, mean, dispersion = self.additive_to_reproductive(p, theta, index) dispersion = 1. / index # Jorg [2], p 131 equation after 4.12 if p == 1: mean = np.exp(theta) else: mean = ((1 - p) * theta) ** (alpha - 1) prefix = ('' if theta == 0 else 'tilted ') match p: case 0: self.name = "Gaussian" case 1: self.name = "Poisson" case 2: self.name = "gamma" case 3: if theta == 0: self.name = "Lévy" else: self.name = "inverse Gaussian" case p if 1 < p < 2: self.name = "Tweedie" case p if p < 0: self.name = prefix + "extreme stable" case p if p > 2: self.name = prefix + "positive extreme stable" case p if np.isinf(p): self.name = prefix + "Landau" case _: raise ValueError(f'Invalid p: {p}') # set member variables self.p = p self.alpha = alpha self.mean = mean self.dispersion = dispersion self.theta = theta self.index = index # variables prefixed tw_ refer to the created Tweedie distribution self.tw_variance = dispersion * mean ** p if self.mode == Mode.REPRODUCTIVE else index * mean ** p self.tw_sd = np.sqrt(self.tw_variance) self.tw_cv = self.tw_sd / mean self._fz = None # for the frozen distribution object self._ft = None
[docs] def dual(self): """Create the dual object.""" match self.mode: case Mode.REPRODUCTIVE: return Tweedie(self.p, theta=self.theta, index=self.index) case Mode.ADDITIVE: return Tweedie(self.p, mean=self.mean, dispersion=self.dispersion)
[docs] @staticmethod def from_p_mean_cv(p: float, mean: float, cv: float) -> "Tweedie": """ Create a Tweedie object from the parameters p, mean, and the output Tweedie cv. The dispersion is computed by equating two expressions for the variance: (cv * mean) ** 2 = dispersion * mean^p. ==> dispersion = cv **2 * mean ** (2 - p) The object is created using p, mean, and dispersion in reproductive mode. """ dispersion = cv ** 2 * mean ** (2 - p) return Tweedie(p=p, mean=mean, dispersion=dispersion)
[docs] @staticmethod def from_po_gamma(frequency: float, *, sev_m: Optional[float] = None, sev_cv: Optional[float] = None, sev_shape: Optional[float] = None, sev_rate: Optional[float] = None, tw_cv: Optional[float] = None) -> "Tweedie": """ Create a Tweedie object for the compound Poisson-gamma case, 1 < p < 2. Inputs * frequency: mean number of claims, required * One of * sev_m: mean severity and sev_cv or * sev_shape and sev_rate or * sev_m and tw_cv (overall distribution cv) Gamma distribution with density f(x) = x^(shape-1) exp(-x rate) / Gamma(shape) for x > 0. Higher rate (Poisson rate of emission of particles per unit time) results in a lower mean waiting time for n to appear. Gamma(shape, rate) has mean shape / rate and variance shape / rate^2. Hence, cv^2 = 1 / shape. The object is created using p, mean, and dispersion in reproductive mode, same as ``from_p_mean_cv``. """ if sev_m is not None and sev_cv is not None: # sev_shape = sev_cv ** -2. sev_rate = sev_shape / sev_m elif sev_shape is not None and sev_rate is not None: sev_m = sev_shape / sev_rate elif sev_m is not None and tw_cv is not None: tw_mean = frequency * sev_m tw_var = (tw_mean * tw_cv) ** 2 # tw_var = freq alpha (alpha + 1) / beta^2 and alpha / beta = sev_m # subst for alpha and solve for beta = sev_rate sev_rate = tw_mean / (tw_var - frequency * sev_m ** 2) sev_shape = sev_m * sev_rate else: raise ValueError('Must input sev_m and sev_cv, or sev_shape and sev_rate, or sev_m and tw_cv') # now figure the Tweedie class reproductive parameters: have all four parameters to # play with shape = np.array(sev_shape) # know that sh=0 have p=2 (gamma), sh=1 have p=1 (Poisson), p >= 0 # so p = (shape + 2) / (shape + 1) p = (shape + 2) / (shape + 1) mean = frequency * sev_m # variance = mean^p * dispersion = frequency * shape * (shape + 1) / sev_rate^2 dispersion = frequency * shape * (shape + 1) * sev_rate ** -2 / mean ** p return Tweedie(p=p, mean=mean, dispersion=dispersion)
[docs] @staticmethod def p_to_alpha(p: float) -> float: """Convert p to Jorgensen's alpha, [2] eq 4.9 p 131.""" # NumPy follows IEEE 754, where division by zero results in np.inf or -np.inf, rather # than raising an exception. GPT tells me this is OK practice. if np.isinf(p): return 1. else: p = np.array(p) return (p - 2) / (p - 1)
[docs] @staticmethod def alpha_to_p(alpha: float) -> float: """Convert alpha to p, [2] eq 4.10 p 131.""" alpha = np.array(alpha) return (alpha - 2) / (alpha - 1)
[docs] @staticmethod def tau(p: float, theta: float) -> float: """ Convert theta (canonical parameter) to mean, using tau = kappa'(theta). Evaluate the tau function, [2] below eq 4.12 p 131. Recall (1-alpha)(1-p) = -1. Assumes dispersion and index = 1, see additive_to_reproductive for more general function. """ if 0 < p < 1: raise ValueError(f'p cannot be in (0, 1), got {p}') if p == 1: return jnp.exp(theta) elif np.isinf(p): # Cauchy, [2] p. 163 return -jnp.log(-theta) if theta < 0 else 0. elif p == 0: # normal return theta elif (p > 2 or p < 0) and theta == 0: # valid cases where mean does not exist return np.inf elif theta == 0: raise ValueError(f'Invalid theta=0 for p={p}') else: # [2] says mu = (theta/(alpha - 1)) ** (alpha - 1) # but alpha - 1 = -1 / (p - 1) = 1 / (1 - p) # and we have ruled out p = 1 # a = Tweedie.p_to_alpha(p) return ((1 - p) * theta) ** (1 / (1 - p))
# m2 = (theta / (a - 1)) ** (a - 1) # if not np.isinf(p): # if np.isinf(m1) and np.isinf(m2): # pass # else: # assert abs(m1 - m2) < 1e-15, f'Tweedie.tau: {p=}, {theta=}, {m1=}, {m2=}'
[docs] @staticmethod def tau_inverse(p: float, mean: float) -> float: """ Convert mean to theta (canonical parameter) using tau_inverse. Evaluate the inverse of the tau function, [2] below eq 4.12 p 131. Assumes dispersion and index = 1, see reproductive_to_additive for more general function. In GLMs tau_inverse is called the canonical link function, mapping the mean to the canonical parameter. """ if p == 1: # poisson, canonical link is log return np.log(mean) elif np.isinf(p): return -np.exp(-mean) else: # inverse of tau function return mean ** (1 - p) / (1 - p)
[docs] @staticmethod def additive_to_reproductive(p: float, theta: float, index: float) -> Tuple[float, float, float]: """ Convert additive parameters to reproductive parameters *for the same distribution*. Warning: this is not the dual distribution! Uses [2] eq 4.8 p. 130. """ # note when p==1 (Poisson), this is still correct return (p, index * Tweedie.tau(p, theta), index ** (1 - p) )
[docs] @staticmethod def reproductive_to_additive(p: float, mean: float, dispersion: float) -> Tuple[float, float, float]: """ Convert reproductive parameters to additive parameters *for the same distribution*. Warning: this is not the dual distribution! Uses [2] Exercise 4.5 and equation below 4.8 on p. 130 (that has a typo). """ if p == 1: # Poisson if dispersion == 1: return p, np.log(mean), 1. else: raise ValueError(f'Poisson additive requires dispersion=1, not {dispersion}') else: # note slightly evil, dispersion = sigma^2 = 1 / index, so # re-enter the square! print(f'{p=}, {mean=}, {dispersion=}, {mean * dispersion ** (1 / (p - 1))=}') return (p, Tweedie.tau_inverse(p, mean * dispersion ** (1 / (p - 1))), dispersion ** (1 / (1 - p)) )
@property def reproductive(self) -> Tuple[float, float, float]: """Return the reproductive parameters: p, mean, dispersion.""" return self.p, self.mean, self.dispersion @property def additive(self) -> Tuple[float, float, float]: """Return the additive parameters: frequency (Poisson mean), gamma shape and rate.""" return self.p, self.theta, self.index def _po_gamma_conniptions(self): """Compute the various Po-gamma compound parameters.""" # Valid only when 1 < p < 2. assert 1 < self.p < 2, "Only valid for 1 < p < 2 distributions." # for both reproductive and additive shape = (2 - self.p) / (self.p - 1) # po-gamma (both modes) # mean = freq * shape / rate # var = freq * shape * (shape + 1) / rate^2 match self.mode: case Mode.REPRODUCTIVE: # mean = self.mean # var = self.dispersion * self.mean^p rate = (shape + 1) / (self.dispersion * self.mean ** (self.p - 1)) frequency = self.mean * rate / shape case Mode.ADDITIVE: # self.mean is computed from p and self.theta # mean = self.index * self.mean # var = self.index * self.mean^p rate = (shape + 1) / (self.index * self.mean ** (self.p - 1)) frequency = self.index * self.mean * rate / shape # this is just standard gamma distribution conniptions sev_m = shape / rate # noqa sev_cv = shape ** -0.5 # return all options return frequency, sev_m, sev_cv, shape, rate # noqa @property def po_gamma_m_cv(self) -> Tuple[float, float, float]: """Return Poisson-gamma parameters, specifying gamma using mean and cv.""" assert 1 < self.p < 2, "Only valid for 1 < p < 2 distributions." f, m, cv, sh, rate = self._po_gamma_conniptions() return f, m, cv @property def po_gamma_shape_rate(self) -> Tuple[float, float, float]: """Return Poisson-gamma parameters, specifying gamma using shape and rate.""" assert 1 < self.p < 2, "Only valid for 1 < p < 2 distributions." f, m, cv, sh, rate = self._po_gamma_conniptions() return f, sh, rate @property def fz(self): """ Return the frozen distribution object, reproductive or additive depending on how the object was initialized. Poisson -------- Rather than a scipy.stats object, which requires index=1, use an Aggregate object. This allows scaling. Note, for Poisson is only reproductive in general. It is Y ~ Po(index * exp(theta)) / index The Aggregate is also not fussy about pdf vs pmf. Gamma ------ Per [2] p. 89 Ga*(theta, lambda) ~ Gamma with shape lambda and scale -theta. """ if self._fz is None: match self.p: case 1: # Poisson or over-dispersed Poisson match self.mode: case Mode.REPRODUCTIVE: # ODP, needs to be an aggregate object, Y = Z / index mean = self.index * self.mean temp = ss.poisson(mean) mx = temp.isf(1e-12) log2 = int(np.ceil(np.log2(mx))) + 1 assert log2 <= 20, f'log2 too large: {log2}' # build agg object to include scaling self._fz = build(f'agg Po{id(self)} {mean} claims dsev [{1 / self.index}] poisson', update=False) # update with appropriate bs and log2 self._fz.update(bs=1 / self.index, log2=log2) case Mode.ADDITIVE: self._fz = ss.poisson(self.index * self.mean) # top unnecessary complaining self._fz.pdf = self._fz.pmf case 2: # gamma match self.mode: case Mode.REPRODUCTIVE: self._fz = ss.gamma(self.index, scale=-1 / (self.theta * self.index)) case Mode.ADDITIVE: self._fz = ss.gamma(self.index, scale=-1 / self.theta) case 3: # inverse Gaussian or Levy Stable 1/2 match self.mode: case Mode.REPRODUCTIVE: if self.theta < 0: self._fz = ss.invgauss(self.mean / self.index, scale=self.index) else: # has no shape parameters self._fz = ss.levy(scale=self.index ** (2. - 1.)) case Mode.ADDITIVE: if self.theta < 0: self._fz = ss.invgauss(self.mean / self.index, scale=self.index ** 2) else: # has no shape parameters self._fz = ss.levy(scale=self.index ** 2.) case 0: # normal match self.mode: case Mode.REPRODUCTIVE: self._fz = ss.norm(loc=self.mean, scale=1.) case Mode.ADDITIVE: self._fz = ss.norm(loc=self.mean, scale=self.dispersion ** 0.5) case self.p if 1 < self.p < 2: # Tweedie distribution # get frequency and gamma parameters # po_gamma_shape_rate accounts for mode of self. f, s, r = self.po_gamma_shape_rate self._fz = build(f'agg TW{id(self)} {f} claims sev {1/r} * gamma {s} poisson') case self.p if 2 < self.p < np.inf: # positive extreme stable assert 0 < self.alpha < 1 if self.theta < 0: NotImplementedError('Negative theta for positive extreme stable') else: match self.mode: case Mode.REPRODUCTIVE: self._fz = ss.levy_stable(self.alpha, 1.0, loc=0, scale=self.index ** (1 / self.alpha - 1)) case Mode.ADDITIVE: self._fz = ss.levy_stable(self.alpha, 1.0, loc=0, scale=self.index ** (1 / self.alpha)) case self.p if np.isinf(self.p): # Cauchy if self.theta != 0: NotImplementedError('Negative theta for Cauchy; no closed form expression, create via FourierTools') else: print('set up Cauchy') self._fz = ss.levy_stable(self.alpha, 1.0, loc=0, scale=self.index) assert self.alpha == 1 match self.mode: case Mode.REPRODUCTIVE: pass case Mode.ADDITIVE: pass case self.p if self.p < 0: # extreme stable, supported on whole real line assert 1 < self.alpha < 2 # assert self.theta <= 0, f'Invalid theta for spectrally negative extreme stable' # if self.theta < 0: # NotImplementedError('Negative theta for spectrally negative extreme; no closed form expression, create via FourierTools') # return this regardless of theta (and mode)! self._fz = ss.levy_stable(self.alpha, -1.0, scale=self.index ** (1 / self.alpha)) # match self.mode: # case Mode.REPRODUCTIVE: # # raise NotImplementedError('Reproductive spectrally positive extreme stable') # case Mode.ADDITIVE: # self._fz = ss.levy_stable(self.alpha, -1.0, scale=self.index ** (1 / self.alpha)) case _: # this should really be caught elsewhere an never occur! raise ValueError(f'Invalid p: {self.p}') return self._fz @property def ft(self): """Create and return FourierTools object.""" if self._ft is None: # scale_mode false because chf already incorporates scale self._ft = FourierTools(self.chf, self.fz, scale_mode=False) return self._ft
[docs] def stats(self): """Empirical moments of the distribution.""" if isinstance(self.fz, Aggregate): return self.fz.agg_m, self.fz.agg_var, self.fz.agg_skew elif self.fz: return self.fz.stats('mvs') else: return np.array([np.nan]*3)
[docs] def audit(self): """ Audit dataframe comparing definition, Fourier, and frozen stats. Moment code from aggregate xsden_to_meancvskew. """ if self.ft._df is None: # noqa raise ValueError('Must compute before estimating stats!') xs = np.array(self.ft.df.index) den = self.ft.df.p.values pg = 1 - den.sum() xd = xs * den if isinstance(xs, np.ndarray): xsm = xs[-1] bs = xs[1] - xs[0] elif isinstance(xs, pd.Series): xsm = xs.iloc[-1] bs = xs.iloc[1] - xs.iloc[0] else: _ = np.array(xs) xsm = _[-1] bs = _[1] - _[0] xsm = xsm + bs ex1 = np.sum(xd) + pg * xsm xd *= xs ex2 = np.sum(xd) + pg * xsm ** 2 ex3 = np.sum(xd * xs) + pg * xsm ** 3 vf = ex2 - ex1 ** 2 # convert to cv and skew mw = MomentWrangler() mw.noncentral = ex1, ex2, ex3 me, cve, ske = mw.mcvsk m, v, sk = self.stats() cv = v ** .5 / m # moments from the definition if self.mode == Mode.REPRODUCTIVE: am = self.mean va = self.dispersion * self.mean ** self.p else: am = self.mean * self.index va = self.index * self.mean ** self.p cva = va ** .5 / am # skewness IDLE grad f = lambda x: self.tau(self.p, x) if self.mode == Mode.REPRODUCTIVE: ska = grad(grad(f))(self.theta) * self.index ** -2 / self.tw_variance ** 1.5 elif self.mode == Mode.ADDITIVE: ska = grad(grad(f))(self.theta) * self.index / self.tw_variance ** 1.5 # assemble answer actual = 'reproductive' if self.mode == Mode.REPRODUCTIVE else 'additive' ans = pd.DataFrame({actual: [am, va, cva, ska], 'fourier': [me, vf, cve, ske], 'frozen': [m, v, cv, sk]}, index=['EX', 'var', 'CV', 'Skew']) ans['abs err'] = (ans.fourier - ans.frozen).abs() ans['rel err'] = ans['abs err'] / ans['frozen'] return ans
[docs] def kappa(self, theta: float): """ Cumulant function of the carrier distribution. Jorgensen [2] equation 4.13, p. 131. """ if self.p == 1: return np.exp(theta) elif self.p == 2: return -np.log(-theta) elif np.isinf(self.p): # [2] p. 163 # print(theta - theta * np.log(-theta)) return np.where(theta==0, 0, theta - theta * np.log(-theta)) else: # per above case are guaranteed alpha != 1 assert self.alpha != 1, "WTF??" return ((self.alpha - 1) / self.alpha * (theta / (self.alpha - 1)) ** self.alpha)
[docs] def K_additive(self, s): """ Cumulant function, reflecting theta and in additive form. See [2] eq. 3.4 and eq 4.15. """ return self.index * (self.kappa(self.theta + s) - self.kappa(self.theta))
[docs] def K_reproductive(self, s): """ Cumulant function, reflecting theta and in additive form. See, [2] eq. 3.5 and eq 4.15. """ return self.index * (self.kappa(self.theta + s / self.index) - self.kappa(self.theta))
[docs] def K(self, s): """Cumulant function, reflecting how the class was initialized.""" match self.mode: case Mode.REPRODUCTIVE: return self.K_reproductive(s) case Mode.ADDITIVE: return self.K_additive(s)
# def K_manual(self, s): # """ # Cumulant function, reflecting theta and in the additive form. # # K(s) = lambda * (kappa(theta + s) - kappa(theta)) # # Per Jorg [2] p. 132 Eq 4.15. Note special treatment # of theta = 0, since 4.15 divides by theta. # Belt 'n braces tester: the same as K! # """ # if self.theta == 0: # assert self.p != 2, 'p=2 does not allow theta = 0' # return self.index * (self.kappa(s) - self.kappa(0)) # assert self.index != 0, 'unexpected, index cannot be 0' # # functions are now scaled by index # if self.p == 1: # return self.index * np.exp(self.theta) * (np.exp(s) - 1) # elif self.p == 2: # return -self.index * np.log(1 + s / self.theta) # else: # # p != 1, 2 # return self.index * self.kappa(self.theta) * ((1 + s / self.theta) ** self.alpha - 1)
[docs] def chf(self, t): """Characteristic function.""" return np.exp(self.K(1j * t))
[docs] def levy_density(self, x): """Levy density.""" raise NotImplementedError('Levy density not implemented')
[docs] @staticmethod def reproductive_to_tweedie_compound(p, mean, dispersion): """ Convert Tweedie reproductive parameters to additive parameters for the compound Poisson-gamma distribution. This is the inverse of the Poisson-gamma conversion. """ if not (1 < p < 2): raise ValueError(f'Only valid for 1 < p < 2 distribution, not {p=}') var = dispersion * mean ** p shape = (2 - p) / (p - 1) beta = var / (shape + 1) frequency = mean / (shape * beta) return frequency, shape, beta
[docs] def to_series(self): """Convert to Series.""" mode = 'reproductive' if self.mode == Mode.REPRODUCTIVE else 'additive' ans = pd.Series([mode, repr(self), self.p, self.mean, self.dispersion, self.theta, self.index, self.tw_cv], index=pd.Index(['mode', 'key', 'p', 'mean', 'dispersion', 'theta', 'index', 'tw_cv'], name='quantity'), name='value') return ans
[docs] def to_series_ex(self): """As to_series but includes Tweedie compound parameters.""" if not (1 < self.p < 2): raise ValueError(f'Only valid for 1 < p < 2 distribution, not {self.p=}') frequency, sev_m, sev_cv, shape, rate = self._po_gamma_conniptions() ans2 = pd.Series([frequency, sev_m, sev_cv, shape, rate], index=['frequency', 'sev_m', 'sev_cv', 'shape', 'rate'], name='quantity') return pd.concat([self.to_series(), ans2])
[docs] def to_tex(self): df = self.to_frame().rename(index=self._renamer) df['parameterization'] = ['Reproductive'] * 3 + ['Additive'] * 2 + ['Tw stats'] df = df.reset_index(drop=False).set_index(['parameterization', 'quantity']) return df
[docs] def to_frame(self): """Convert to DataFrame.""" return self.to_series().to_frame()
[docs] def to_json(self): return self.to_series().to_dict()
[docs] def to_decl(self, name='TW'): """Parameters needed to feed to Aggregate using tweedie keyword.""" if self.mode == Mode.REPRODUCTIVE: return f'agg {name} tweedie {self.mean} {self.p} {self.dispersion}' else: raise NotImplementedError('Additive decl not yet implemented')
def __repr_html__(self): return self.to_frame().to_html() def __repr__(self): if self.mode == Mode.ADDITIVE: return f'Tweedie*(p={self.p:.3g}, θ={self.theta:.3g}, λ={self.index:.3g})' elif self.mode == Mode.REPRODUCTIVE: return f'Tweedie(p={self.p:.3g}, μ={self.mean:.3g}, σ^2={self.dispersion:.3g}; θ={self.theta:.3g})' else: return f'WTF is that...mode={self.mode}' # def __str__(self): # star = '*' if self.mode == Mode.ADDITIVE else '' # return f'Tweedie{star}(p={self.p:.3g}, μ={self.mean:.3g}, sigma2={self.dispersion:.3g}, θ={self.theta:.3g})' def _repr_mimebundle_(self, include=None, exclude=None): # noqa return { # "application/json": {"key": "value"}, "text/html": self.to_frame().to_html(), "text/plain": repr(self) }
[docs] def test(self, bs=None, x_min=0, log2=16, s=1e-8, plot=True, simpson=False): """Test the Tweedie class object.""" n = 1 << log2 if bs is None: bs = (self.ft.fz.ppf(1-s) - x_min) / n bs = 2 ** np.round(np.log2(bs), 0) print(f'{bs=}, {1/bs=}') self.ft.invert(log2, x_min=x_min, bs=bs) if self.ft.fz is not None: self.ft.compute_exact() if plot: self.ft.plot(suptitle=self.name, verbose=False) # for ax in self.ft.last_fig.axes[:2]: # l, u = ax.get_xlim() # ax.set(xlim=[-u/20, u]) else: if plot: self.ft.df.plot(figsize=(3, 2.25)) if simpson: self.ft.invert_simpson(log2=log2, x_min=x_min, bs=bs) ax = self.ft.df.plot(figsize=(3,2.25), logy=True, title='2 methods') if self.ft.fz is not None: self.ft.df_exact.plot(ax=ax) return self.audit()
[docs] def make_test_suite(mode=Mode.REPRODUCTIVE): """Create a test suite for the Tweedie class.""" p = Path(r'C:\Users\steve\S\TELOS\Blog\quarto\ConvexConsiderations\posts\notes\2025-02-06-Tweedie-distributions\test-cases.csv') assert p.exists() tests = pd.read_csv(p) # convert str to list: tests.theta = tests.theta.map(eval) tests['index'] = tests['index'].map(eval) # explode lists tests_ex = tests.explode('theta').explode('index').reset_index(drop=True) tests_ex.index.name = 'n' # convert to floats for c in ['p', 'alpha', 'theta', 'index']: tests_ex[c] = [eval(i) if type(i)==str else float(i) for i in tests_ex[c]] # figure reproductive parameters: tau is not vectorized tests_ex['mu'] = [Tweedie.tau(p, t) for p, t in tests_ex[['p', 'theta']].values] tests_ex['dispersion'] = 1 / tests_ex['index'] # figure expected repro moments tests_ex['mean_reproductive'] = tests_ex['mu'] tests_ex['variance_reproductive'] = tests_ex['mu'] ** tests_ex['p'] * tests_ex['dispersion'] tests_ex['sd'] = tests_ex.variance_reproductive ** 0.5 tests_ex['cv'] = tests_ex.sd / tests_ex['mean_reproductive'] tests_ex = tests_ex.drop(columns=['sd', 'notes']) # expected additive moments tests_ex['mean_additive'] = tests_ex['mu'] * tests_ex['index'] tests_ex['var_additive'] = tests_ex['index'] * tests_ex['mu'] ** tests_ex['p'] tests_ex['cv_additive'] = tests_ex.var_additive ** 0.5 / tests_ex.mean_additive return tests_ex
[docs] def run_test(p, theta, index, log2=16, bs=None, s=1e-12, **kwargs): """Run a test of the Tweedie class.""" # additive ta = Tweedie(p, theta=theta, index=index) print(ta) ans = ta.test(bs=bs, log2=log2, s=s, **kwargs) if 1 < p < 2: f = ta.po_gamma_shape_rate print(f, ta.po_gamma_m_cv, sep='\n') print(np.exp(-f[0])) display(ans) print('='*80) # gamma reproductive ==================================== tr = ta.dual() print(tr) ans = tr.test(bs=bs, log2=log2, s=s, **kwargs) if 1 < p < 2: f = tr.po_gamma_shape_rate print(f, tr.po_gamma_m_cv, sep='\n') print(np.exp(-f[0])) display(ans) return ta, tr
[docs] def tweedie_illustration(): """Make the usual graph of Tweedie distributions.""" # redo with Jorg sign for alpha alpha = np.linspace(-4, 2, 101) p = (-alpha+2) / (-alpha+1) f, ax = plt.subplots(figsize=(5,5)) # want to cut out the vertical line and color code by type of distribution idx = (1 <= p) * (p < 2) ax.plot(alpha[idx], p[idx], lw=3, c='C0') idx = 2 <= p ax.plot(alpha[idx], p[idx], lw=3, c='C1') idx = p <= 0 ax.plot(alpha[idx], p[idx], lw=3, c='C2') # asymptotes ax.axhline(1, c='g', lw=.5, ls='--') ax.axhline(2, c='g', lw=.5, ls='--') ax.axvline(1, c='g', lw=.5, ls='--') # axes ax.axvline(0, c='k', lw=.25) ax.axhline(0, c='k', lw=.25) # ticks and axis labels ax.set(xlabel='$\\alpha=(p-2)/(p-1)$, jump tail density $\\propto e^{\\theta x}/x^{\\alpha + 1}$', # base jump density is $x^{\\bar\\alpha}$', ylabel='Variance power function $p$, $V(\\mu)=\\mu^p$, $p=(\\alpha-2)/(\\alpha-1)$', ylim=[-5,10.3], xlim=[-5., 2.5]) ax.yaxis.set_major_locator(ticker.FixedLocator(np.arange(-4,11,1))) ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: f'{x:.0f}')) # if int(x)%2==0 or x==3 else '' )) # ax.yaxis.set_minor_locator(ticker.FixedLocator(np.arange(-4,10,0.5))) # ax.xaxis.set_minor_locator(ticker.FixedLocator(np.arange(-2,3,0.5))) # annotations def ql(x, y, t, dot=True, ha='right', va='bottom'): """Handy annotator function.""" voff = {'bottom': 0.2, 'top': -.2, 'center': 0.} hoff = {'left': 0.2, 'right': -0.2, 'center': 0.} ax.text(x + hoff[ha], y + voff[va], t, ha=ha, va=va) if dot: ax.plot(x,y, 'ko', ms=5) ql(2, 0, 'Gaussian', ha='center') ql(1, 10, 'Landau\nas $p\\to\\infty$', ha='left', va='top') ql(.5, 3, 'Lévy stable 3/2\ninv Gaussian', ha='right') ql(0, 2, 'Gamma') ql(-4.5, 1, 'Poisson\n$\\alpha\\to-\\infty$', va='top', ha='center') ql(1.25, -3, 'Extreme\nstable,\n$\\theta ≥ 0$', False, ha='left', va='center') ql(0.5, -3, 'Positive\nextreme\nstable,\n$\\theta ≤ 0$', False, ha='center', va='center') ql(-2, -3, 'Tweedie,\n$\\theta < 0$', False, ha='center', va='center') ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False)