Unverified Commit 7f307b91 authored by Luciano Paz's avatar Luciano Paz Committed by GitHub

Cap the values that Beta.random can generate. (#3924)

* TST: Added test for the failing sampler

* BUG: Make Beta.random to use the clipped random number generator

* DOC: Added to release notes

* Fix the PR link in the release notes

* FIX: use scipy.stats and change ref_rand to point to clipped_beta_rvs

* FIX: Use np.maximum and np.minimum to work with scalars and arrays
parent e47b98a9
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
- End of sampling report now uses `arviz.InferenceData` internally and avoids storing - End of sampling report now uses `arviz.InferenceData` internally and avoids storing
pointwise log likelihood (see [#3883](https://github.com/pymc-devs/pymc3/pull/3883)). pointwise log likelihood (see [#3883](https://github.com/pymc-devs/pymc3/pull/3883)).
- The multiprocessing start method on MacOS is now set to "forkserver", to avoid crashes (see issue [#3849](https://github.com/pymc-devs/pymc3/issues/3849), solved by [#3919](https://github.com/pymc-devs/pymc3/pull/3919)). - The multiprocessing start method on MacOS is now set to "forkserver", to avoid crashes (see issue [#3849](https://github.com/pymc-devs/pymc3/issues/3849), solved by [#3919](https://github.com/pymc-devs/pymc3/pull/3919)).
- Forced the `Beta` distribution's `random` method to generate samples that are in the open interval $(0, 1)$, i.e. no value can be equal to zero or equal to one (issue [#3898](https://github.com/pymc-devs/pymc3/issues/3898) fixed by [#3924](https://github.com/pymc-devs/pymc3/pull/3924)).
### Deprecations ### Deprecations
- Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6. - Remove `sample_ppc` and `sample_ppc_w` that were deprecated in 3.6.
......
...@@ -32,6 +32,7 @@ from ..math import invlogit, logit, logdiffexp ...@@ -32,6 +32,7 @@ from ..math import invlogit, logit, logdiffexp
from .dist_math import ( from .dist_math import (
alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow, alltrue_elemwise, betaln, bound, gammaln, i0e, incomplete_beta, logpow,
normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue, normal_lccdf, normal_lcdf, SplineWrapper, std_cdf, zvalue,
clipped_beta_rvs,
) )
from .distribution import (Continuous, draw_values, generate_samples) from .distribution import (Continuous, draw_values, generate_samples)
...@@ -1290,7 +1291,7 @@ class Beta(UnitContinuous): ...@@ -1290,7 +1291,7 @@ class Beta(UnitContinuous):
""" """
alpha, beta = draw_values([self.alpha, self.beta], alpha, beta = draw_values([self.alpha, self.beta],
point=point, size=size) point=point, size=size)
return generate_samples(stats.beta.rvs, alpha, beta, return generate_samples(clipped_beta_rvs, alpha, beta,
dist_shape=self.shape, dist_shape=self.shape,
size=size) size=size)
......
...@@ -19,6 +19,7 @@ Created on Mar 7, 2011 ...@@ -19,6 +19,7 @@ Created on Mar 7, 2011
''' '''
import numpy as np import numpy as np
import scipy.linalg import scipy.linalg
import scipy.stats
import theano.tensor as tt import theano.tensor as tt
import theano import theano
from theano.scalar import UnaryScalarOp, upgrade_to_float_no_complex from theano.scalar import UnaryScalarOp, upgrade_to_float_no_complex
...@@ -33,6 +34,11 @@ from pymc3.theanof import floatX ...@@ -33,6 +34,11 @@ from pymc3.theanof import floatX
f = floatX f = floatX
c = - .5 * np.log(2. * np.pi) c = - .5 * np.log(2. * np.pi)
_beta_clip_values = {
dtype: (np.nextafter(0, 1, dtype=dtype), np.nextafter(1, 0, dtype=dtype))
for dtype in ["float16", "float32", "float64", "float128"]
}
def bound(logp, *conditions, **kwargs): def bound(logp, *conditions, **kwargs):
...@@ -548,3 +554,43 @@ def incomplete_beta(a, b, value): ...@@ -548,3 +554,43 @@ def incomplete_beta(a, b, value):
tt.and_(tt.le(b * value, one), tt.le(value, 0.95)), tt.and_(tt.le(b * value, one), tt.le(value, 0.95)),
ps, ps,
t)) t))
def clipped_beta_rvs(a, b, size=None, dtype="float64"):
"""Draw beta distributed random samples in the open :math:`(0, 1)` interval.
The samples are generated with ``scipy.stats.beta.rvs``, but any value that
is equal to 0 or 1 will be shifted towards the next floating point in the
interval :math:`[0, 1]`, depending on the floating point precision that is
given by ``dtype``.
Parameters
----------
a : float or array_like of floats
Alpha, strictly positive (>0).
b : float or array_like of floats
Beta, strictly positive (>0).
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
a single value is returned if ``a`` and ``b`` are both scalars.
Otherwise, ``np.broadcast(a, b).size`` samples are drawn.
dtype : str or dtype instance
The floating point precision that the samples should have. This also
determines the value that will be used to shift any samples returned
by the numpy random number generator that are zero or one.
Returns
-------
out : ndarray or scalar
Drawn samples from the parameterized beta distribution. The scipy
implementation can yield values that are equal to zero or one. We
assume the support of the Beta distribution to be in the open interval
:math:`(0, 1)`, so we shift any sample that is equal to 0 to
``np.nextafter(0, 1, dtype=dtype)`` and any sample that is equal to 1
is shifted to ``np.nextafter(1, 0, dtype=dtype)``.
"""
out = scipy.stats.beta.rvs(a, b, size=size).astype(dtype)
lower, upper = _beta_clip_values[dtype]
return np.maximum(np.minimum(out, upper), lower)
\ No newline at end of file
...@@ -24,7 +24,9 @@ import pytest ...@@ -24,7 +24,9 @@ import pytest
from ..theanof import floatX from ..theanof import floatX
from ..distributions import Discrete from ..distributions import Discrete
from ..distributions.dist_math import ( from ..distributions.dist_math import (
bound, factln, alltrue_scalar, MvNormalLogp, SplineWrapper, i0e) bound, factln, alltrue_scalar, MvNormalLogp, SplineWrapper, i0e,
clipped_beta_rvs,
)
def test_bound(): def test_bound():
...@@ -216,3 +218,11 @@ class TestI0e: ...@@ -216,3 +218,11 @@ class TestI0e:
utt.verify_grad(i0e, [-2.]) utt.verify_grad(i0e, [-2.])
utt.verify_grad(i0e, [[0.5, -2.]]) utt.verify_grad(i0e, [[0.5, -2.]])
utt.verify_grad(i0e, [[[0.5, -2.]]]) utt.verify_grad(i0e, [[[0.5, -2.]]])
@pytest.mark.parametrize("dtype", ["float16", "float32", "float64", "float128"])
def test_clipped_beta_rvs(dtype):
# Verify that the samples drawn from the beta distribution are never
# equal to zero or one (issue #3898)
values = clipped_beta_rvs(0.01, 0.01, size=1000000, dtype=dtype)
assert not (np.any(values == 0) or np.any(values == 1))
\ No newline at end of file
...@@ -22,6 +22,7 @@ import numpy.random as nr ...@@ -22,6 +22,7 @@ import numpy.random as nr
import theano import theano
import pymc3 as pm import pymc3 as pm
from pymc3.distributions.dist_math import clipped_beta_rvs
from pymc3.distributions.distribution import (draw_values, from pymc3.distributions.distribution import (draw_values,
_DrawValuesContext, _DrawValuesContext,
_DrawValuesContextBlocker) _DrawValuesContextBlocker)
...@@ -548,7 +549,7 @@ class TestScalarParameterSamples(SeededTest): ...@@ -548,7 +549,7 @@ class TestScalarParameterSamples(SeededTest):
def test_beta(self): def test_beta(self):
def ref_rand(size, alpha, beta): def ref_rand(size, alpha, beta):
return st.beta.rvs(a=alpha, b=beta, size=size) return clipped_beta_rvs(a=alpha, b=beta, size=size)
pymc3_random(pm.Beta, {'alpha': Rplus, 'beta': Rplus}, ref_rand=ref_rand) pymc3_random(pm.Beta, {'alpha': Rplus, 'beta': Rplus}, ref_rand=ref_rand)
def test_exponential(self): def test_exponential(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment