Source code for srxraylib.util.inverse_method_sampler

"""
 Classes for creating random points following a given numeric distribution using the inverse method. Covers 1D, 2D and 3D sampling.

 See tests for examples of use.
"""

__authors__ = ["M Sanchez del Rio - ESRF ISDD Advanced Analysis and Modelling"]
__license__ = "MIT"
__date__ = "27/08/2018"

import numpy

[docs]class Sampler1D(object): """ Constructor. Parameters ---------- pdf : numpy array 1D input probability distrubution function. pdf_x : numpy array the abscissas of the odf. cdf_interpolation_factor : float, optional interpolation factor for calculating the cdf (1 makes no interpolation)/ """ def __init__(self, pdf, pdf_x=None, cdf_interpolation_factor=1): self._pdf = pdf if pdf_x is None: self._set_default_pdf_x() else: self._pdf_x = pdf_x self._cdf_interpolation_factor = cdf_interpolation_factor self._cdf_calculate() # defines self._cdf and self._cdf_x
[docs] def pdf(self): """ Gets the array with probability distribution function (pdf). Returns ------- numpy array The pdf array (referenced, not copied). """ return self._pdf
[docs] def abscissas(self): """ Gets the abscissas array. Returns ------- numpy array The abscissas array (referenced, not copied). """ return self._pdf_x
[docs] def cdf(self): """ Gets the cumulative distribution function (cdf). Returns ------- numpy array The cdf (referenced, not copied). """ return self._cdf
[docs] def cdf_abscissas(self): """ Gets the abscissas of the cumulative distribution function (cdf). Returns ------- numpy array The cdf abscissas (referenced, not copied). """ return self._cdf_x
[docs] def get_sampled(self, random_in_0_1): """ Return an array with sampled points. Parameters ---------- random_in_0_1 : float or numpy array Points sampled in a uniform interval. Returns ------- numpy array the points sampled with the current pdf. The number of points is equal to the dimension of random_in_0_1. """ y = numpy.array(random_in_0_1) if y.size > 1: x_rand_array = numpy.zeros_like(random_in_0_1) for i,cdf_rand in enumerate(random_in_0_1): ival,idelta,pendent = self._get_index(cdf_rand) x_rand_array[i] = self._pdf_x[ival] + idelta*(self._pdf_x[1]-self._pdf_x[0]) return x_rand_array else: ival,idelta,pendent = self._get_index(random_in_0_1) return self._pdf_x[ival] + idelta*(self._pdf_x[1]-self._pdf_x[0])
[docs] def get_sampled_and_histogram(self, random_in_0_1, bins=51, range=None): """ Return an array with sampled points and the histogram. Parameters ---------- random_in_0_1 : float or numpy array Points sampled in a uniform interval. bins : int, optional Number of bins range : list or tuple [min, max] the histogram limits. Returns ------- tuple (s1, h, bin_edges) s1: the points sampled with the current pdf. The number of points is equal to the dimension of random_in_0_1, h: the array with the histogram values at the bin edges, bin_edges: the bin edges. """ s1 = self.get_sampled(random_in_0_1) if range is None: range = [self._pdf_x.min(),self._pdf_x.max()] # # histogram # h, bin_edges = numpy.histogram(s1,bins=bins,range=range) return s1, numpy.array(h), numpy.array(bin_edges)
[docs] def get_n_sampled_points(self, npoints, seed=None): """ Returns a given number points sampled points sampled with the pdf. Parameters ---------- npoints : int The number of points. seed : int, optional The seed (numpy generator is initialized with numpy.random.default_rng(seed)) Returns ------- numpy array The sampled points. """ if not seed is None: rng = numpy.random.default_rng(seed) cdf_rand_array = rng.random(npoints) else: cdf_rand_array = numpy.random.random(npoints) return self.get_sampled(cdf_rand_array)
[docs] def get_n_sampled_points_and_histogram(self, npoints, bins=51, range=None, seed=None): """ Returns a given number points sampled points sampled with the pdf and the histogram. Parameters ---------- npoints : int The number of points. seed : int, optional The seed (numpy generator is initialized with numpy.random.default_rng(seed)) bins : int, optional Number of bins range : list or tuple [min, max] the histogram limits. Returns ------- tuple (s1, h, bin_edges) s1: the points sampled with the current pdf. The number of points is equal to the dimension of random_in_0_1, h: the array with the histogram values at the bin edges, bin_edges: the bin edges. """ if not seed is None: rng = numpy.random.default_rng(seed) cdf_rand_array = rng.random(npoints) else: cdf_rand_array = numpy.random.random(npoints) return self.get_sampled_and_histogram(cdf_rand_array,bins=bins,range=range)
def _set_default_pdf_x(self): self._pdf_x = numpy.arange(self._pdf.size) def _cdf_calculate(self): if self._cdf_interpolation_factor != 1: xx = numpy.linspace(self.abscissas()[0],self.abscissas()[-1],self.abscissas().size*2) yy = numpy.interp(xx,self.abscissas(),self.pdf()) self._cdf = numpy.cumsum(yy) self._cdf_x = xx else: self._cdf = numpy.cumsum(self._pdf) self._cdf_x = self._pdf_x.copy() self._cdf -= self._cdf[0] if self._cdf.max() != 0.0: self._cdf /= self._cdf.max() def _get_index(self,edge): try: ix = numpy.nonzero(self._cdf >= edge)[0][0] except: ix = 0 if ix > 0: ix -= 1 if ix >= (self._cdf.size - 1): pendent = 0.0 delta = 0.0 else: pendent = self._cdf[ix + 1] - self._cdf[ix] delta = (edge - self._cdf[ix]) / pendent return ix//self._cdf_interpolation_factor,delta,pendent
[docs]class Sampler2D(object): """ Constructor. Parameters ---------- pdf : numpy array the 2D pdf. pdf_x0 : numpy array A 1D array with the abscissas for axis 0. pdf_x1 : numpy array A 1D array with the abscissas for axis 1. """ def __init__(self, pdf, pdf_x0=None, pdf_x1=None): self._pdf = pdf if pdf_x0 is None: self._pdf_x0 = numpy.arange(self._pdf.shape[0]) else: self._pdf_x0 = pdf_x0 if pdf_x1 is None: self._pdf_x1 = numpy.arange(self._pdf.shape[1]) else: self._pdf_x1 = pdf_x1 self._cdf2,self._cdf1 = self._cdf_calculate()
[docs] def pdf(self): """ Gets the array with probability distribution function (pdf). Returns ------- numpy array The pdf array (referenced, not copied). """ return self._pdf
[docs] def cdf(self): """ Gets the array with cumulated distribution function (cdf). Returns ------- numpy array The cdf array (referenced, not copied). """ return self._cdf2,self._cdf1
[docs] def abscissas(self): """ Gets the abscisas arrays. Returns ------- tuple (x0, x1) The arrays for axes 0 and 1. """ return self._pdf_x0,self._pdf_x1
[docs] def get_sampled(self, random0, random1): """ Samples a point or multiple points in 2D (two coordinates) following the given pdf. Parameters ---------- random0 : float or numpy array The 1D array with values unifiormly samples in [0,1] random1 : float or numpy array The 1D array with values unifiormly samples in [0,1] Returns ------- tuple (x,y) the coordinates x (float or array) and y (float or array) of the sampled point(s). """ y0 = numpy.array(random0) y1 = numpy.array(random1) if y0.size > 1: x0_rand_array = numpy.zeros_like(y0) x1_rand_array = numpy.zeros_like(y1) for i,cdf_rand0 in enumerate(y0): ival,idelta,pendent = self._get_index0(cdf_rand0) x0_rand_array[i] = self._pdf_x0[ival] + idelta*(self._pdf_x0[1]-self._pdf_x0[0]) ival1,idelta1,pendent1 = self._get_index1(y1[i],ival+1) # <==================== changed to ival+1 x1_rand_array[i] = self._pdf_x1[ival1] + idelta1*(self._pdf_x1[1]-self._pdf_x1[0]) return x0_rand_array, x1_rand_array else: pass # TODO make scalar case
[docs] def get_sampled_x2(self, random0, random10, random11): """ Samples a point or multiple points in 2D (two coordinates) following the given pdf. It samples one point in axis 0 and two points on the axis 1. Parameters ---------- random0 : float or numpy array The 1D array with values unifiormly samples in [0,1] random10 : float or numpy array The 1D array with values unifiormly samples in [0,1] random11 : float or numpy array The 1D array with values unifiormly samples in [0,1] Returns ------- tuple (x, y0, y1) the coordinates x (float or array) on the axis 0 and and y0, y1 (float or array) of the sampled point(s) in axis 1. """ y0 = numpy.array(random0) y10 = numpy.array(random10) y11 = numpy.array(random11) if y0.size > 1: x0_rand_array = numpy.zeros_like(y0) x10_rand_array = numpy.zeros_like(y10) x11_rand_array = numpy.zeros_like(y11) for i,cdf_rand0 in enumerate(y0): ival, idelta, pendent = self._get_index0(cdf_rand0) x0_rand_array[i] = self._pdf_x0[ival] + idelta*(self._pdf_x0[1]-self._pdf_x0[0]) ival10, idelta10, pendent10 = self._get_index1(y10[i], ival+1) # <==================== changed to ival+1 x10_rand_array[i] = self._pdf_x1[ival10] + idelta10 * (self._pdf_x1[1] - self._pdf_x1[0]) ival11, idelta11, pendent11 = self._get_index1(y11[i], ival+1) # <==================== changed to ival+1 x11_rand_array[i] = self._pdf_x1[ival11] + idelta11 * (self._pdf_x1[1] - self._pdf_x1[0]) return x0_rand_array, x10_rand_array, x11_rand_array else: pass # TODO make scalar case
[docs] def get_n_sampled_points(self, npoints, seed=None): """ Samples n points (two coordinates) following the given pdf. Parameters ---------- npoints : int The number of points. seed : int, optional The seed (numpy generator is initialized with numpy.random.default_rng(seed)) Returns ------- tuple (x,y) the coordinates x (float or array) and y (float or array) of the sampled point(s). """ if not seed is None: rng = numpy.random.default_rng(seed) cdf_rand_array0 = rng.random(npoints) cdf_rand_array1 = rng.random(npoints) else: cdf_rand_array0 = numpy.random.random(npoints) cdf_rand_array1 = numpy.random.random(npoints) return self.get_sampled(cdf_rand_array0, cdf_rand_array1)
[docs] def get_n_sampled_points_x2(self, npoints, seed=None): """ Samples n points (two coordinates, two times the second axis) following the given pdf. Parameters ---------- npoints : int The number of points. seed : int, optional The seed (numpy generator is initialized with numpy.random.default_rng(seed)) Returns ------- tuple (x, y0, y1) the coordinates x (float or array) on the axis 0 and and y0, y1 (float or array) of the sampled point(s) on axis 1. """ if not seed is None: rng = numpy.random.default_rng(seed) cdf_rand_array0 = rng.random(npoints) cdf_rand_array1 = rng.random(npoints) cdf_rand_array2 = rng.random(npoints) else: cdf_rand_array0 = numpy.random.random(npoints) cdf_rand_array1 = numpy.random.random(npoints) cdf_rand_array2 = numpy.random.random(npoints) return self.get_sampled_x2(cdf_rand_array0, cdf_rand_array1, cdf_rand_array2)
def _cdf_calculate(self): pdf2 = self._pdf pdf1 = pdf2.sum(axis=1) cdf2 = numpy.zeros_like(pdf2) for i in range(cdf2.shape[0]): cdf2[i,:] = numpy.cumsum(pdf2[i,:]) cdf2[i,:] -= cdf2[i,:][0] cdf2[i,:] = cdf2[i,:] / float(cdf2[i,:].max()) cdf1 = numpy.cumsum(pdf1) cdf1 -= cdf1[0] cdf1 /= cdf1.max() return cdf2,cdf1 def _get_index0(self,edge): try: ix = numpy.nonzero(self._cdf1 >= edge)[0][0] except: ix = 0 if ix > 0: ix -= 1 if ix == (self._cdf1.size-1): pendent = 0.0 delta = 0.0 else: pendent = self._cdf1[ix+1] - self._cdf1[ix] delta = (edge - self._cdf1[ix]) / pendent return ix,delta,pendent def _get_index1(self,edge,index0): try: ix = numpy.nonzero(self._cdf2[index0, :] >= edge)[0][0] except: ix = 0 if ix > 0: ix -= 1 if ix == (self._cdf2[index0,:].size-1): pendent = 0.0 delta = 0.0 else: pendent = self._cdf2[index0,(ix+1)] - self._cdf2[index0,ix] delta = (edge - self._cdf2[index0,ix]) / pendent return ix,delta,pendent
[docs]class Sampler3D(object): """ Constructor. Parameters ---------- pdf : numpy array The 3D pdf. pdf_x0 : numpy array The abscissas for axis 0. pdf_x1 : numpy array The abscissas for axis 1. pdf_x2 : numpy array The abscissas for axis 2. """ def __init__(self, pdf, pdf_x0=None, pdf_x1=None, pdf_x2=None): self._pdf = pdf if pdf_x0 is None: self._pdf_x0 = numpy.arange(self._pdf.shape[0]) else: self._pdf_x0 = pdf_x0 if pdf_x1 is None: self._pdf_x1 = numpy.arange(self._pdf.shape[1]) else: self._pdf_x1 = pdf_x1 if pdf_x2 is None: self._pdf_x2 = numpy.arange(self._pdf.shape[2]) else: self._pdf_x2 = pdf_x2 self._cdf3,self._cdf2,self._cdf1 = self._cdf_calculate()
[docs] def pdf(self): """ Gets the array with probability distribution function (pdf). Returns ------- numpy array The pdf array (referenced, not copied). """ return self._pdf
[docs] def cdf(self): """ Gets the array with cumulative distribution function (cdf). Returns ------- numpy array The cdf array (referenced, not copied). """ return self._cdf3,self._cdf2,self._cdf1
[docs] def abscissas(self): """ Gets the arrays with the abscissas. Returns ------- tuple (x0, x1, x2) The arrays with the abscissas for axes 1, 2, and 3. """ return self._pdf_x0,self._pdf_x1,self._pdf_x2
[docs] def get_sampled(self, random0, random1, random2): """ Get sampled 3D points. Parameters ---------- random0 : float or numpy array The points or points sampled uniformly in a [0,1] interval. random1 : float or numpy array The points or points sampled uniformly in a [0,1] interval. random2 : float or numpy array The points or points sampled uniformly in a [0,1] interval. Returns ------- tuple (x0,x1,x2) the coordinates (float or array) of the sampled points. """ y0 = numpy.array(random0) y1 = numpy.array(random1) y2 = numpy.array(random2) if y0.size > 1: x0_rand_array = numpy.zeros_like(y0) x1_rand_array = numpy.zeros_like(y1) x2_rand_array = numpy.zeros_like(y2) for i,cdf_rand0 in enumerate(y0): ival,idelta,pendent = self._get_index0(cdf_rand0) x0_rand_array[i] = self._pdf_x0[ival] + idelta*(self._pdf_x0[1]-self._pdf_x0[0]) ival1,idelta1,pendent1 = self._get_index1(y1[i],ival+1) # <==================== TODO: test changed to ival+1 x1_rand_array[i] = self._pdf_x1[ival1] + idelta1*(self._pdf_x1[1]-self._pdf_x1[0]) ival2,idelta2,pendent2 = self._get_index2(y2[i],ival+1,ival1+1) # <==================== TODO: test changed to ival+1,ival1+1 x2_rand_array[i] = self._pdf_x2[ival2] + idelta2*(self._pdf_x2[1]-self._pdf_x2[0]) return x0_rand_array,x1_rand_array,x2_rand_array else: pass #TODO do the scalar case
[docs] def get_n_sampled_points(self, npoints, seed=None): """ Get a given number of sampled 3D points. Parameters ---------- npoints : int The number of points. seed : int, optional The seed (numpy generator is initialized with numpy.random.default_rng(seed)) Returns ------- tuple (x,y,z) the coordinates x (float or array), y (float or array) and z (float or array) of the sampled point(s). """ if not seed is None: rng = numpy.random.default_rng(seed) cdf_rand_array0 = rng.random(npoints) cdf_rand_array1 = rng.random(npoints) cdf_rand_array2 = numpy.random.random(npoints) else: cdf_rand_array0 = numpy.random.random(npoints) cdf_rand_array1 = numpy.random.random(npoints) cdf_rand_array2 = numpy.random.random(npoints) return self.get_sampled(cdf_rand_array0, cdf_rand_array1, cdf_rand_array2)
def _cdf_calculate(self): pdf3 = self._pdf pdf2 = pdf3.sum(axis=2) pdf1 = pdf2.sum(axis=1) cdf3 = numpy.zeros_like(pdf3) cdf2 = numpy.zeros_like(pdf2) for i in range(cdf3.shape[0]): for j in range(cdf3.shape[1]): tmp = numpy.cumsum(pdf3[i,j,:]) cdf3[i, j, :] = tmp cdf3[i,j,:] -= cdf3[i,j,0] cdf3[i,j,:] = cdf3[i,j,:] / float(cdf3[i,j,:].max()) # for i in range(cdf3.shape[0]): tmp = numpy.cumsum(pdf2[i,:]) cdf2[i, :] = tmp cdf2[i,:] -= cdf2[i,0] cdf2[i,:] = cdf2[i,:] / float(cdf2[i,:].max()) # # cdf1 = numpy.cumsum(pdf1) cdf1 -= cdf1[0] cdf1 /= cdf1.max() return cdf3,cdf2,cdf1 def _get_index0(self,edge): try: ix = numpy.nonzero(self._cdf1 > edge)[0][0] except: ix = 0 if ix > 0: ix -= 1 if ix == (self._cdf1.size-1): pendent = 0.0 delta = 0.0 else: pendent = self._cdf1[ix+1] - self._cdf1[ix] delta = (edge - self._cdf1[ix]) / pendent return ix,delta,pendent def _get_index1(self,edge,index0): try: ix = numpy.nonzero(self._cdf2[index0,:] > edge)[0][0] except: ix = 0 if ix > 0: ix -= 1 if ix == (self._cdf2[index0,:].size-1): pendent = 0.0 delta = 0.0 else: pendent = self._cdf2[index0,(ix+1)] - self._cdf2[index0,ix] delta = (edge - self._cdf2[index0,ix]) / pendent return ix,delta,pendent def _get_index2(self,edge,index0,index1): try: ix = numpy.nonzero(self._cdf3[index0, index1, :] > edge)[0][0] except: ix = 0 if ix > 0: ix -= 1 if ix == (self._cdf3[index0,index1,:].size-1): pendent = 0.0 delta = 0.0 else: pendent = self._cdf3[index0,index1,(ix+1)] - self._cdf3[index0,index1,ix] delta = (edge - self._cdf3[index0,index1,ix]) / pendent return ix,delta,pendent