using Plots
# two useful defaults: uniform random: 0 to 1.0
# gaussian: -Inf to Inf
function simple_dist()
N1 = 28000
p1 = Plots.histogram(rand(N1), normalize = true, label="rand")
Plots.histogram!(randn(N1), normalize = true, alpha=0.5, label="randn")
# sampled distribution
rho1(x) = 3 / 4 * (1 - x^2)
function _dist()
data = []
for i1 = 1:N1
x1 = -1 + 2 * rand()
# max. rho(x) is 3/4
y1 = 3/4 * rand()
# von Neumann rejection: accepts points under the line
if y1 < rho1(x1)
push!(data, x1)
else
end
end
return data
end
p2 = Plots.histogram(_dist(), normalize = true, label="random")
plot!(range(-1, 1, length=20), rho1, label="rho(x)")
plot(p1, p2)
end
simple_dist()
sampling an arbitrary distributions $\rho(x)$¶
Direct Sampling¶
from uniform random distribution $r1, r2$, collect y = max(r1, r2) to get $ \rho(y) = 2 y $
Inverse Transform Method¶
construct $r(x) = F(x) = \int^x dx^\prime \, \rho(x^\prime)$ such that
$ dr = dx \, \rho(x) $;
invert to get $x(r)$
e.g. sample $ x(r) = -\lambda \ln r $
to get
$\rho(x) = \frac{1}{\lambda} e^{-\frac{x}{\lambda}}$
Von Neumann Rejection¶
generate a pair of random numbers (x, y)
if y < rho(x), accepts x
such a collection of x satisfies dist. rho(x)
# simple examples of direct and inverse transform methods
Nmax = 4500
dist1 = []
for i1 in 1:Nmax
push!(dist1, max(rand(), rand()))
end
p1 = histogram(dist1, normalize=:pdf, alpha = 0.6, label="direct sampling")
plot!(range(0.0, 1.0, length=20), x-> 2*x)
# rejection
function reject(rho; Nx=12000, xmin=0.0, xmax=1.0, ymin=0.0, ymax=1.0)
i1 = 0
bag = []
while true
xx = xmin + rand()*(xmax-xmin)
yy = ymin + rand()*(ymax-ymin)
if yy < rho(xx)
push!(bag, xx)
i1 += 1
end
if i1 > Nx
break
end
end
return bag
end
reject (generic function with 1 method)
# random distribution by direct sampling
tau = 1.0
# using knwon result
r2x = r -> -tau *log(1-r)
N = 25000
r_arr = rand(N)
# a new random distribution is just as easy as this:
x_arr = r2x.(r_arr)
x1_arr = reject(x->exp(-x/tau)/tau, xmax=4.0)
histogram(x_arr, normalize=:pdf, alpha = 0.6, label="direct sampling")
plot!(range(0.0, 10.0, length=50), t->exp(-t/tau)/tau, xlim=(0,5), label="analytic")
histogram!(x1_arr, normalize=:pdf, alpha=0.5, label="rejection")
let
rho = x -> 3/4 * (1 - x^3)
data = reject(rho, xmin=-1, xmax=1, ymax=3/2)
# again, improve the integral routine!
xvec = range(-1, 1, 150)
dx = 2.0 / (length(xvec) - 1)
norm = sum([rho(xx+dx/2) * dx for xx in xvec[1:end-1]])
Plots.histogram(data, normalize = true, label="rejection")
Plots.plot!(range(-1, 1, length = 250), x -> rho(x) / norm, label="analytic")
end
# application
tau = 1.0
r2x = r -> -tau *log(1-r)
N = 2000
r_arr = rand(N)
# as easy as that
x_arr = r2x.(r_arr)
rho = x -> exp(-x/tau)/tau
f = x -> exp(-x^2)
ret1 = sum( @. f(x_arr)/rho(x_arr) )/N
println([ret1, sqrt(pi)/2])
[0.8875311053147379, 0.8862269254527579]
# random distribution by direct sampling
r2x = r -> tan(pi *(r-0.4))
N = 1500
r_arr = rand(N)
# as easy as that
x_arr = r2x.(r_arr)
histogram(x_arr, normalize=:pdf, label="direct sampling")
plot!(range(-10.0, 10.0, length=2500), x->1/(1+x^2)/pi, xlim=(-10,10), label="analytic")
Monte Carlo Integration¶
To calculate an integral using Monte Carlo Method:
$$ \cal{I} = \int_{a}^{b} \, dx \, f(x). $$
The simplest way is to consider a uniform distribution x_arr in a:b and compute
$$ \begin{align} \cal{I} &\approx (b-a) \, \langle f(x) \rangle \\ \langle f(x) \rangle &= \frac{1}{N} \, \sum_{x_i} \, f(x_i) \end{align} $$
The second line makes explicit an alternative way to interpret an integral: mean value theorem.
There are two ways to interpret this:
- First, an integral is given by a weighed sum:
$$ \cal{I} = \sum_i \, w_i f(x_i). $$
In this case, the weight is simply $w_i = \frac{b-a}{N}$.
- Another way to view this is to consider a sampling of a collection [x] (by rejection or direct sampling), such that
$$ d r = dx \, \rho(x). $$
Equivalently $\frac{dr}{dx} = \rho(x),$ and evaluate
$$ \cal{I} = \int \, dr \, \frac{1}{\rho(x)} \, f(x) \approx \frac{1}{N} \, \sum_{i} \, \frac{f(x_i)}{\rho(x_i)}. $$
Note that $\rho$ should be properly normalized. In the simplest case,
$$ x = a + (b-a) \times r $$
$$ \rho(x) = 1/(b-a) $$
and we recover the standard result. If not, you need to include a factor of
$${\rm norm} = \int dx \, \rho(x) $$ to get the correct result.
The nice thing is that we can now select a non-uniform distribution to improve the convergence.
"
# application
r2x = r -> tan(pi *(r-0.5))
N = 25000
r_arr = rand(N)
# as easy as that
x_arr = r2x.(r_arr)
rho = x -> 1/(1+x^2)/pi
f = x -> exp(-x^2)
ret1 = sum( @. f(x_arr)/rho(x_arr) )/N
println([ret1, sqrt(pi)])
[1.763123601681473, 1.7724538509055159]
a tricky example¶
see whether you understand this...
$${\rm norm} = \int_a^b \, dx \, \rho(x).$$
And hence the integral approximation formula becomes
$$\begin{align} \cal{I} = \int dx \, \rho(x) \, f(x) &\approx \frac{\rm norm}{N} \, {\sum_{i}}^\prime \, \frac{f(x_i)}{\rho(x_i)}\\ &= {\rm norm} \times \langle \frac{f}{\rho} \rangle. \end{align}$$
What if we choose $f(x) = {\rho(x)}^{-1}$ ?
let
# another example
rho(x) = exp(-x)
data = reject(rho; Nx = 25000, xmin = 0.0, xmax = 5.0)
ret = sum([ 1/rho(xx) for xx in data ])/length(data)
compare = (5.0)/(1-exp(-5.0))
[ret, compare]
end
2-element Vector{Float64}:
4.9558349005191795
5.033918274531521