Julia Set¶
In the notebook I present different way to accelerate python code. This is a modified version from Loic Gouarin
The test case is the computation of the Julia set wikipedia
import os, sys
if sys.platform == 'darwin':
os.environ['CC'] = 'gcc-10'
os.environ['CXX'] = 'g++-10'
import warnings
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
plt.rcParams['figure.figsize'] = (6,6)
Initialize Mesh¶
nx, ny = 512, 512 # mesh
lim, maxit = 400, 2000 # limits
vmin, vmax = 0, 200
x = np.linspace(-1.6, 1.6, nx)
y = np.linspace(-1.6, 1.6, ny)
c = -0.772691322542185 + 0.124281466072787j
Pure Python¶
def juliaset_python(x, y, c, lim, maxit):
"""
returns Julia set
"""
julia = np.zeros((x.size, y.size))
for i in range(x.size):
for j in range(y.size):
z = x[i] + 1j*y[j]
ite = 0
while abs(z) < lim and ite < maxit:
z = z**2 + c
ite += 1
julia[j, i] = ite
return julia
def plot_julia_set(julia):
plt.figure(figsize=(6,6))
plt.imshow(julia, cmap = cm.Greys, vmin=vmin, vmax=vmax)
plot_julia_set(juliaset_python(x, y, c, lim, maxit))
numba¶
Numba will accelerate the pure python function just with
the decorator @jit
. Numba does everything for you.
from numba import jit
@jit(nopython=True, parallel=True)
def juliaset_numba(x, y, c, lim, maxit):
julia = np.zeros((x.size, y.size))
lim2 = lim*lim
c = complex(c) # needed for numba
for j in range(y.size):
for i in range(x.size):
z = complex(x[i], y[j])
ite = 0
while (z.real*z.real + z.imag*z.imag) < lim2 and ite < maxit:
z = z*z + c
ite += 1
julia[j, i] = ite
return julia
plot_julia_set(juliaset_numba(x, y, c, lim, maxit))
PyJulia¶
PyJulia is a python module to import Julia function in your Python session. You can also run Julia code in a middle of a Jupyter notebook with a Python kernel.
To use pyjulia you need to install Julia and install PyCall.jl and REPL
julia> using Pkg
julia> ENV["PYTHON"] = "/usr/local/bin/python3.7"
julia> Pkg.add("PyCall")
julia> Pkg.add("REPL")
julia> Pkg.build("PyCall")
print the value of sys.executable
to know the python path. But the cell above could do the job.
import julia
julia.install()
from julia.api import Julia
jl = Julia(compiled_modules=False)
[ Info: Julia version info
Julia Version 1.6.4
Commit 35f0c911f4 (2021-11-19 03:54 UTC)
Platform Info:
OS: macOS (x86_64-apple-darwin19.5.0)
uname: Darwin 20.6.0 Darwin Kernel Version 20.6.0: Tue Oct 12 18:33:42 PDT 2021; root:xnu-7195.141.8~1/RELEASE_X86_64 x86_64 i386
CPU: Intel(R) Xeon(R) CPU E5-1650 v2 @ 3.50GHz:
speed user nice sys idle irq
#1 3337 MHz 2127 s 0 s 1850 s 4307 s 0 s
#2 3337 MHz 2330 s 0 s 1664 s 4283 s 0 s
#3 3337 MHz 2052 s 0 s 1424 s 4802 s 0 s
Memory: 14.0 GB (4774.51171875 MB free)
Uptime: 828.0 sec
Load Avg:
2.8427734375 3.46484375 3.21826171875
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-11.0.1 (ORCJIT, ivybridge)
Environment:
GITHUB_EVENT_PATH = /Users/runner/work/_temp/_github_workflow/event.json
ANDROID_HOME = /Users/runner/Library/Android/sdk
NVM_CD_FLAGS =
JAVA_HOME_8_X64 = /Users/runner/hostedtoolcache/Java_Temurin-Hotspot_jdk/8.0.312-7/x64/Contents/Home/
NUNIT_BASE_PATH = /Library/Developer/nunit
NUNIT3_PATH = /Library/Developer/nunit/3.6.0
JAVA_HOME_11_X64 = /Users/runner/hostedtoolcache/Java_Temurin-Hotspot_jdk/11.0.13-8/x64/Contents/Home/
ANDROID_NDK_LATEST_HOME = /Users/runner/Library/Android/sdk/ndk/23.1.7779620
HOMEBREW_NO_AUTO_UPDATE = 1
JAVA_HOME_17_X64 = /Users/runner/hostedtoolcache/Java_Temurin-Hotspot_jdk/17.0.1-12/x64/Contents/Home/
PATH = /usr/local/lib/ruby/gems/2.7.0/bin:/usr/local/opt/ruby@2.7/bin:/usr/local/opt/pipx_bin:/Users/runner/.cargo/bin:/usr/local/opt/curl/bin:/usr/local/bin:/usr/local/sbin:/Users/runner/bin:/Users/runner/.yarn/bin:/Users/runner/Library/Android/sdk/tools:/Users/runner/Library/Android/sdk/platform-tools:/Users/runner/Library/Android/sdk/ndk-bundle:/Library/Frameworks/Mono.framework/Versions/Current/Commands:/usr/bin:/bin:/usr/sbin:/sbin:/Users/runner/.dotnet/tools:/Users/runner/.ghcup/bin:/Users/runner/hostedtoolcache/stack/2.7.3/x64
JAVA_HOME = /Users/runner/hostedtoolcache/Java_Temurin-Hotspot_jdk/8.0.312-7/x64/Contents/Home/
XPC_FLAGS = 0x0
PIPX_HOME = /usr/local/opt/pipx
HOME = /Users/runner
HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS = 3650
HOMEBREW_CASK_OPTS = --no-quarantine
ANDROID_NDK_HOME = /Users/runner/Library/Android/sdk/ndk-bundle
GITHUB_PATH = /Users/runner/work/_temp/_runner_file_commands/add_path_a6754f69-63af-4471-8bee-05994f198ed7
TERM = xterm-color
[ Info: Julia executable: /Applications/Julia-1.6.app/Contents/Resources/julia/bin/julia
[ Info: Trying to import PyCall...
┌ Info: PyCall is already installed and compatible with Python executable.
│
│ PyCall:
│ python: /usr/local/opt/python@3.8/bin/python3
│ libpython: /usr/local/Cellar/python@3.8/3.8.12_1/Frameworks/Python.framework/Versions/3.8/Python
│ Python:
│ python: /usr/local/opt/python@3.8/bin/python3.8
└ libpython: /usr/local/Cellar/python@3.8/3.8.12_1/Frameworks/Python.framework/Versions/3.8/Python
%%file julia_set.jl
function escapetime(z, c, lim, maxit)
for n = 1:maxit
if abs(z) > lim
return n-1
end
z = z*z + c
end
return maxit
end
function juliaset_julia(x :: Vector{Float64}, y :: Vector{Float64},
c :: Complex, lim , maxit )
nx = length(x)
ny = length(y)
julia = zeros(Float64, (nx, ny))
Threads.@sync for i in eachindex(x)
Threads.@spawn for j in eachindex(y)
@inbounds z = x[i] + 1im * y[j]
@inbounds julia[j, i] = escapetime(z, c, lim, maxit)
end
end
return julia
end
Writing julia_set.jl
from julia import Main
juliaset_julia = Main.include("julia_set.jl")
plot_julia_set(juliaset_julia(x, y, c, lim, maxit))
Pythran¶
Pythran is a Python-to-C++ translator
Add a comment line before your python function and it runs much faster.
Configuration¶
~/.pythranrc
file on macos (gcc is installed with hombrew and pythran with pip)
[compiler]
include_dirs=/usr/local/opt/openblas/include
library_dirs=/usr/local/opt/openblas/lib
blas=openblas
CXX=g++-10
CC=gcc-10
%load_ext pythran.magic
%%pythran -fopenmp
import numpy as np
#pythran export juliaset_pythran(float64[], float64[],complex, int, int)
def juliaset_pythran(x, y, c, lim, maxit):
"""
returns Julia set
"""
juliap = np.zeros((x.size, y.size), dtype=np.int32)
#omp parallel for private(z, ite)
for j in range(y.size):
for i in range(x.size):
z = x[i] + 1j*y[j]
ite = 0
while abs(z) < lim and ite < maxit:
z = z**2 + c
ite += 1
juliap[j, i] = ite
return juliap
plot_julia_set(juliaset_pythran(x, y, c, lim, maxit))
Fortran¶
You need numpy and fortran-magic
%load_ext fortranmagic
On my mac i need to change compilers install with homebrew
Complex computation inside the loop are avoided on purpose. It takes time even with fortran.
%%fortran
subroutine juliaset_fortran(x, y, c, lim, maxit, julia)
real(8), intent(in) :: x(:)
real(8), intent(in) :: y(:)
complex(8), intent(in) :: c
real(8), intent(in) :: lim
integer, intent(in) :: maxit
integer, intent(out) :: julia(size(x),size(y))
real(8) :: zr, zi, limsq, cr, ci, tmp
integer :: ite, nx, ny
nx = size(x)
ny = size(y)
limsq = lim * lim
cr = real(c)
ci = imag(c)
do i = 1, nx
do j = 1, ny
zr = x(i)
zi = y(j)
ite = 0
do while (zr*zr+zi*zi < limsq .and. ite < maxit)
tmp = zr*zr - zi*zi
zi = 2*zr*zi + ci
zr = tmp + cr
ite = ite + 1
end do
julia(j, i) = ite
end do
end do
end subroutine juliaset_fortran
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/ipykernel_23346/2650996058.py in <module>
----> 1 get_ipython().run_cell_magic('fortran', '', 'subroutine juliaset_fortran(x, y, c, lim, maxit, julia)\n\n real(8), intent(in) :: x(:)\n real(8), intent(in) :: y(:)\n complex(8), intent(in) :: c\n real(8), intent(in) :: lim\n integer, intent(in) :: maxit\n integer, intent(out) :: julia(size(x),size(y))\n\n real(8) :: zr, zi, limsq, cr, ci, tmp\n integer :: ite, nx, ny\n\n nx = size(x)\n ny = size(y)\n limsq = lim * lim\n cr = real(c)\n ci = imag(c)\n\n do i = 1, nx\n do j = 1, ny \n zr = x(i)\n zi = y(j)\n ite = 0\n do while (zr*zr+zi*zi < limsq .and. ite < maxit)\n tmp = zr*zr - zi*zi \n zi = 2*zr*zi + ci\n zr = tmp + cr\n ite = ite + 1\n end do\n julia(j, i) = ite\n end do\n end do\n\n\nend subroutine juliaset_fortran\n')
/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2417 with self.builtin_trap:
2418 args = (magic_arg_s, cell)
-> 2419 result = fn(*args, **kwargs)
2420 return result
2421
/usr/local/lib/python3.8/site-packages/decorator.py in fun(*args, **kw)
230 if not kwsyntax:
231 args, kw = fix(args, kw, sig)
--> 232 return caller(func, *(extras + args), **kw)
233 fun.__name__ = func.__name__
234 fun.__doc__ = func.__doc__
/usr/local/lib/python3.8/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
/usr/local/lib/python3.8/site-packages/fortranmagic.py in fortran(self, line, cell)
377 verbosity=args.verbosity)
378 if res != 0:
--> 379 raise RuntimeError("f2py failed, see output")
380
381 self._code_cache[key] = module_name
RuntimeError: f2py failed, see output
plot_julia_set(juliaset_fortran(x, y, c, lim, maxit))
Fortran with OpenMP¶
On Linux you don’t need to add -L/usr/local/lib
This is the same function from above with some openmp directives
%%fortran --f90flags "-fopenmp" --opt "-O3" --extra "-lgomp"
subroutine juliaset_fortran_omp(x, y, c, lim, maxit, julia)
real(8), intent(in) :: x(:)
real(8), intent(in) :: y(:)
complex(8), intent(in) :: c
real(8), intent(in) :: lim
integer, intent(in) :: maxit
integer, intent(out) :: julia(size(x),size(y))
real(8) :: zr, zi, limsq, cr, ci, tmp
integer :: ite, nx, ny
nx = size(x)
ny = size(y)
limsq = lim * lim
cr = real(c)
ci = imag(c)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP FIRSTPRIVATE(nx,ny,x,y,c,limsq,maxit,cr,ci) &
!$OMP PRIVATE(i,j,ite,zr,zi, tmp) &
!$OMP SHARED(julia)
!$OMP DO SCHEDULE(DYNAMIC)
do i = 1, nx
do j = 1, ny
zr = x(i)
zi = y(j)
ite = 0
do while (zr*zr+zi*zi < limsq .and. ite < maxit)
tmp = zr*zr - zi*zi
zi = 2*zr*zi + ci
zr = tmp + cr
ite = ite + 1
end do
julia(j, i) = ite
end do
end do
!$OMP END PARALLEL
end subroutine juliaset_fortran_omp
plot_julia_set(juliaset_fortran_omp(x, y, c, lim, maxit))
Numpy¶
Vectorize version with numpy. You could get some overflow warnings
import itertools
def juliaset_numpy(x, y, c, lim, maxit):
julia = np.zeros((x.size, y.size), dtype=np.int32)
zx = x[np.newaxis, :]
zy = y[:, np.newaxis]
z = zx + zy*1j
for ite in itertools.count():
z = z**2 + c
mask = np.logical_not(julia) & (np.abs(z) >= lim)
julia[mask] = ite
if np.all(julia) or ite > maxit:
return julia
plot_julia_set(juliaset_numpy(x, y, c, lim, maxit))
Cython¶
Install Cython
Use %%cython -a
to highlight the Python-C and C-Python conversions.
Cython is not Python and not C, it is another language :-)
%load_ext cython
%%cython
import numpy as np
import cython
@cython.boundscheck(False)
@cython.wraparound(False)
def juliaset_cython(double [:] x, double [:] y, double complex c, double lim, int maxit):
cdef:
int [:, ::1] julia = np.zeros((x.size, y.size), dtype = np.int32)
double tmp, zr, zi, lim2 = lim*lim
double cr = c.real, ci = c.imag
int ite, i, j, nx=x.size, ny=y.size
for i in range(nx):
for j in range(ny):
zr = x[i]
zi = y[j]
ite = 0
while (zr*zr + zi*zi) < lim2 and ite < maxit:
zr, zi = zr*zr - zi*zi + cr, 2*zr*zi + ci
ite += 1
julia[j, i] = ite
return julia
plot_julia_set(juliaset_cython(x, y, c, lim, maxit))
As f2py we can use openmp with the Cython prange
function
%%cython -f -c-fopenmp --link-args=-fopenmp
import numpy as np
import cython
from cython.parallel import prange
from libc.stdlib cimport malloc, free
@cython.boundscheck(False)
@cython.wraparound(False)
def juliaset_cython_omp(double [:] x, double [:] y, double complex c, double lim, int maxit):
cdef:
int [:, ::1] julia = np.zeros((x.size, y.size), dtype = np.int32)
double tmp, zr, zi, lim2 = lim*lim
double cr = c.real, ci = c.imag
int i, j, nx=x.size, ny=y.size
int *ite
for j in prange(ny, nogil=True, schedule='dynamic'):
ite = <int *> malloc(sizeof(int))
for i in range(nx):
zr = x[i]
zi = y[j]
ite[0] = 0
while (zr*zr + zi*zi) < lim2 and ite[0] < maxit:
zr, zi = zr*zr - zi*zi + cr, 2*zr*zi + ci
ite[0] += 1
julia[j, i] = ite[0]
free(ite)
return julia
plot_julia_set(juliaset_cython_omp(x, y, c, lim, maxit))
Set number of threads used for parallel functions¶
%env OMP_NUM_THREADS=4
%env JULIA_NUM_THREADS=4
env: OMP_NUM_THREADS=4
env: JULIA_NUM_THREADS=4
import pandas as pd
from collections import defaultdict
from tqdm import tqdm_notebook as tqdm
results = defaultdict(list)
nx, ny = 1024, 1024 # increase mesh size
x = np.linspace(-1.6, 1.6, nx)
y = np.linspace(-1.6, 1.6, ny)
functions = [juliaset_numpy,
juliaset_fortran,
juliaset_fortran_omp,
juliaset_cython,
juliaset_cython_omp,
juliaset_numba,
juliaset_pythran,
juliaset_julia]
for f in tqdm(functions):
_ = %timeit -oq -n 1 f(x, y, c, lim, maxit)
results['etime'] += [_.best]
results = pd.DataFrame(results, index=list(map(lambda f:f.__name__[9:],functions)))
results["speed_up"] = [results.etime["numpy"]/t for t in results.etime]
results.sort_values(by="speed_up",axis=0)
etime | speed_up | |
---|---|---|
numpy | 6.920963 | 1.000000 |
julia | 0.083905 | 82.486083 |
numba | 0.073950 | 93.589887 |
fortran | 0.058941 | 117.422458 |
cython | 0.056860 | 121.719685 |
pythran | 0.043657 | 158.531005 |
fortran_omp | 0.016614 | 416.571950 |
cython_omp | 0.016206 | 427.060584 |