Julia Set

In the notebook I present different way to accelerate python code. This is a modified version from Loic Gouarin

The test case is the computation of the Julia set wikipedia

import os, sys

if sys.platform == 'darwin':
    os.environ['CC'] = 'gcc-10'
    os.environ['CXX'] = 'g++-10'
import warnings
warnings.filterwarnings('ignore')
%config InlineBackend.figure_format = 'retina'
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm

plt.rcParams['figure.figsize'] = (6,6)

Initialize Mesh

nx, ny = 512, 512 # mesh
lim, maxit = 400, 2000 # limits 
vmin, vmax = 0, 200 

x = np.linspace(-1.6, 1.6, nx)
y = np.linspace(-1.6, 1.6, ny)
c = -0.772691322542185 + 0.124281466072787j

Pure Python

def juliaset_python(x, y, c, lim, maxit):
    """ 
    returns Julia set
    """
    julia = np.zeros((x.size, y.size))

    for i in range(x.size):
        for j in range(y.size):
            z = x[i] + 1j*y[j]
            ite = 0
            while abs(z) < lim and ite < maxit:
                z = z**2 + c
                ite += 1
            julia[j, i] = ite

    return julia
def plot_julia_set(julia):
    plt.figure(figsize=(6,6))
    plt.imshow(julia, cmap = cm.Greys, vmin=vmin, vmax=vmax)
plot_julia_set(juliaset_python(x, y, c, lim, maxit))
_images/03.julia-set_10_0.png

numba

Numba will accelerate the pure python function just with the decorator @jit. Numba does everything for you.

from numba import jit

@jit(nopython=True, parallel=True)
def juliaset_numba(x, y, c, lim, maxit):
    julia = np.zeros((x.size, y.size))
    lim2 = lim*lim
    
    c = complex(c)  # needed for numba
    for j in range(y.size):
        for i in range(x.size):

            z = complex(x[i], y[j])
            ite = 0
            while (z.real*z.real + z.imag*z.imag) < lim2 and ite < maxit:
                z = z*z + c
                ite += 1
            julia[j, i] = ite

    return julia
plot_julia_set(juliaset_numba(x, y, c, lim, maxit))
_images/03.julia-set_13_0.png

PyJulia

PyJulia is a python module to import Julia function in your Python session. You can also run Julia code in a middle of a Jupyter notebook with a Python kernel.

To use pyjulia you need to install Julia and install PyCall.jl and REPL

julia> using Pkg
julia> ENV["PYTHON"] = "/usr/local/bin/python3.7"
julia> Pkg.add("PyCall")
julia> Pkg.add("REPL")
julia> Pkg.build("PyCall")

print the value of sys.executable to know the python path. But the cell above could do the job.

import julia
julia.install()
from julia.api import Julia
jl = Julia(compiled_modules=False)
[ Info: Julia version info
Julia Version 1.6.4
Commit 35f0c911f4 (2021-11-19 03:54 UTC)
Platform Info:
  OS: macOS (x86_64-apple-darwin19.5.0)
  uname: Darwin 20.6.0 Darwin Kernel Version 20.6.0: Tue Oct 12 18:33:42 PDT 2021; root:xnu-7195.141.8~1/RELEASE_X86_64 x86_64 i386
  CPU: Intel(R) Xeon(R) CPU E5-1650 v2 @ 3.50GHz: 
              speed         user         nice          sys         idle          irq
       #1  3337 MHz       2127 s          0 s       1850 s       4307 s          0 s
       #2  3337 MHz       2330 s          0 s       1664 s       4283 s          0 s
       #3  3337 MHz       2052 s          0 s       1424 s       4802 s          0 s
       
  Memory: 14.0 GB (4774.51171875 MB free)
  Uptime: 828.0 sec
  Load Avg: 
 2.8427734375  3.46484375  3.21826171875
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-11.0.1 (ORCJIT, ivybridge)
Environment:
  GITHUB_EVENT_PATH = /Users/runner/work/_temp/_github_workflow/event.json
  ANDROID_HOME = /Users/runner/Library/Android/sdk
  NVM_CD_FLAGS = 
  JAVA_HOME_8_X64 = /Users/runner/hostedtoolcache/Java_Temurin-Hotspot_jdk/8.0.312-7/x64/Contents/Home/
  NUNIT_BASE_PATH = /Library/Developer/nunit
  NUNIT3_PATH = /Library/Developer/nunit/3.6.0
  JAVA_HOME_11_X64 = /Users/runner/hostedtoolcache/Java_Temurin-Hotspot_jdk/11.0.13-8/x64/Contents/Home/
  ANDROID_NDK_LATEST_HOME = /Users/runner/Library/Android/sdk/ndk/23.1.7779620
  HOMEBREW_NO_AUTO_UPDATE = 1
  JAVA_HOME_17_X64 = /Users/runner/hostedtoolcache/Java_Temurin-Hotspot_jdk/17.0.1-12/x64/Contents/Home/
  PATH = /usr/local/lib/ruby/gems/2.7.0/bin:/usr/local/opt/ruby@2.7/bin:/usr/local/opt/pipx_bin:/Users/runner/.cargo/bin:/usr/local/opt/curl/bin:/usr/local/bin:/usr/local/sbin:/Users/runner/bin:/Users/runner/.yarn/bin:/Users/runner/Library/Android/sdk/tools:/Users/runner/Library/Android/sdk/platform-tools:/Users/runner/Library/Android/sdk/ndk-bundle:/Library/Frameworks/Mono.framework/Versions/Current/Commands:/usr/bin:/bin:/usr/sbin:/sbin:/Users/runner/.dotnet/tools:/Users/runner/.ghcup/bin:/Users/runner/hostedtoolcache/stack/2.7.3/x64
  JAVA_HOME = /Users/runner/hostedtoolcache/Java_Temurin-Hotspot_jdk/8.0.312-7/x64/Contents/Home/
  XPC_FLAGS = 0x0
  PIPX_HOME = /usr/local/opt/pipx
  HOME = /Users/runner
  HOMEBREW_CLEANUP_PERIODIC_FULL_DAYS = 3650
  HOMEBREW_CASK_OPTS = --no-quarantine
  ANDROID_NDK_HOME = /Users/runner/Library/Android/sdk/ndk-bundle
  GITHUB_PATH = /Users/runner/work/_temp/_runner_file_commands/add_path_a6754f69-63af-4471-8bee-05994f198ed7
  TERM = xterm-color
[ Info: Julia executable: /Applications/Julia-1.6.app/Contents/Resources/julia/bin/julia
[ Info: Trying to import PyCall...
┌ Info: PyCall is already installed and compatible with Python executable.
│ 
│ PyCall:
│     python: /usr/local/opt/python@3.8/bin/python3
│     libpython: /usr/local/Cellar/python@3.8/3.8.12_1/Frameworks/Python.framework/Versions/3.8/Python
│ Python:
│     python: /usr/local/opt/python@3.8/bin/python3.8
└     libpython: /usr/local/Cellar/python@3.8/3.8.12_1/Frameworks/Python.framework/Versions/3.8/Python
%%file julia_set.jl

function escapetime(z, c, lim, maxit)

    for n = 1:maxit
        if abs(z) > lim
            return n-1
        end
        z = z*z + c
    end
    return maxit
end

function juliaset_julia(x :: Vector{Float64}, y :: Vector{Float64}, 
                        c :: Complex, lim , maxit )
    
    nx = length(x)
    ny = length(y)
    julia = zeros(Float64, (nx, ny))
    Threads.@sync for i in eachindex(x)
        Threads.@spawn for j in eachindex(y)
            @inbounds z  = x[i] + 1im * y[j] 
            @inbounds julia[j, i] = escapetime(z, c, lim, maxit)
        end
    end
    return julia
end
Writing julia_set.jl
from julia import Main
juliaset_julia = Main.include("julia_set.jl")
plot_julia_set(juliaset_julia(x, y, c, lim, maxit))
_images/03.julia-set_18_0.png

Pythran

Pythran is a Python-to-C++ translator

Add a comment line before your python function and it runs much faster.

Configuration

~/.pythranrc file on macos (gcc is installed with hombrew and pythran with pip)

[compiler]
include_dirs=/usr/local/opt/openblas/include
library_dirs=/usr/local/opt/openblas/lib
blas=openblas
CXX=g++-10
CC=gcc-10
%load_ext pythran.magic
%%pythran -fopenmp

import numpy as np

#pythran export juliaset_pythran(float64[], float64[],complex, int, int)
def juliaset_pythran(x, y, c, lim, maxit):
    """ 
    returns Julia set
    """
    juliap = np.zeros((x.size, y.size), dtype=np.int32)

    #omp parallel for private(z, ite)
    for j in range(y.size):
        for i in range(x.size):
            z = x[i] + 1j*y[j]
            ite = 0
            while abs(z) < lim and ite < maxit:
                z = z**2 + c
                ite += 1
            juliap[j, i] = ite

    return juliap
plot_julia_set(juliaset_pythran(x, y, c, lim, maxit))
_images/03.julia-set_22_0.png

Fortran

You need numpy and fortran-magic

%load_ext fortranmagic

On my mac i need to change compilers install with homebrew

Complex computation inside the loop are avoided on purpose. It takes time even with fortran.

%%fortran
subroutine juliaset_fortran(x, y, c, lim, maxit, julia)

    real(8),    intent(in)  :: x(:)
    real(8),    intent(in)  :: y(:)
    complex(8), intent(in)  :: c
    real(8),    intent(in)  :: lim
    integer,    intent(in)  :: maxit
    integer,    intent(out) :: julia(size(x),size(y))

    real(8)    :: zr, zi, limsq, cr, ci, tmp
    integer    :: ite, nx, ny

    nx = size(x)
    ny = size(y)
    limsq = lim * lim
    cr = real(c)
    ci = imag(c)

    do i = 1, nx
       do j = 1, ny   
            zr = x(i)
            zi = y(j)
            ite = 0
            do while (zr*zr+zi*zi < limsq .and. ite < maxit)
                tmp = zr*zr - zi*zi 
                zi = 2*zr*zi + ci
                zr = tmp + cr
                ite = ite + 1
            end do
            julia(j, i) = ite
        end do
    end do


end subroutine juliaset_fortran
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/ipykernel_23346/2650996058.py in <module>
----> 1 get_ipython().run_cell_magic('fortran', '', 'subroutine juliaset_fortran(x, y, c, lim, maxit, julia)\n\n    real(8),    intent(in)  :: x(:)\n    real(8),    intent(in)  :: y(:)\n    complex(8), intent(in)  :: c\n    real(8),    intent(in)  :: lim\n    integer,    intent(in)  :: maxit\n    integer,    intent(out) :: julia(size(x),size(y))\n\n    real(8)    :: zr, zi, limsq, cr, ci, tmp\n    integer    :: ite, nx, ny\n\n    nx = size(x)\n    ny = size(y)\n    limsq = lim * lim\n    cr = real(c)\n    ci = imag(c)\n\n    do i = 1, nx\n       do j = 1, ny   \n            zr = x(i)\n            zi = y(j)\n            ite = 0\n            do while (zr*zr+zi*zi < limsq .and. ite < maxit)\n                tmp = zr*zr - zi*zi \n                zi = 2*zr*zi + ci\n                zr = tmp + cr\n                ite = ite + 1\n            end do\n            julia(j, i) = ite\n        end do\n    end do\n\n\nend subroutine juliaset_fortran\n')

/usr/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
   2417             with self.builtin_trap:
   2418                 args = (magic_arg_s, cell)
-> 2419                 result = fn(*args, **kwargs)
   2420             return result
   2421 

/usr/local/lib/python3.8/site-packages/decorator.py in fun(*args, **kw)
    230             if not kwsyntax:
    231                 args, kw = fix(args, kw, sig)
--> 232             return caller(func, *(extras + args), **kw)
    233     fun.__name__ = func.__name__
    234     fun.__doc__ = func.__doc__

/usr/local/lib/python3.8/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
    185     # but it's overkill for just that one bit of state.
    186     def magic_deco(arg):
--> 187         call = lambda f, *a, **k: f(*a, **k)
    188 
    189         if callable(arg):

/usr/local/lib/python3.8/site-packages/fortranmagic.py in fortran(self, line, cell)
    377                              verbosity=args.verbosity)
    378         if res != 0:
--> 379            raise RuntimeError("f2py failed, see output")
    380 
    381         self._code_cache[key] = module_name

RuntimeError: f2py failed, see output
plot_julia_set(juliaset_fortran(x, y, c, lim, maxit))
_images/03.julia-set_28_0.png

Fortran with OpenMP

On Linux you don’t need to add -L/usr/local/lib

This is the same function from above with some openmp directives

%%fortran --f90flags "-fopenmp" --opt "-O3" --extra "-lgomp"
subroutine juliaset_fortran_omp(x, y, c, lim, maxit, julia)

    real(8),    intent(in)  :: x(:)
    real(8),    intent(in)  :: y(:)
    complex(8), intent(in)  :: c
    real(8),    intent(in)  :: lim
    integer,    intent(in)  :: maxit
    integer,    intent(out) :: julia(size(x),size(y))

    real(8)    :: zr, zi, limsq, cr, ci, tmp
    integer    :: ite, nx, ny

    nx = size(x)
    ny = size(y)
    limsq = lim * lim
    cr = real(c)
    ci = imag(c)

    !$OMP PARALLEL DEFAULT(NONE) &
    !$OMP FIRSTPRIVATE(nx,ny,x,y,c,limsq,maxit,cr,ci) &
    !$OMP PRIVATE(i,j,ite,zr,zi, tmp) &
    !$OMP SHARED(julia)
    !$OMP DO SCHEDULE(DYNAMIC)
    do i = 1, nx
       do j = 1, ny   
            zr = x(i)
            zi = y(j)
            ite = 0
            do while (zr*zr+zi*zi < limsq .and. ite < maxit)
                tmp = zr*zr - zi*zi 
                zi = 2*zr*zi + ci
                zr = tmp + cr
                ite = ite + 1
            end do
            julia(j, i) = ite
        end do
    end do  
    !$OMP END PARALLEL


end subroutine juliaset_fortran_omp
plot_julia_set(juliaset_fortran_omp(x, y, c, lim, maxit))
_images/03.julia-set_31_0.png

Numpy

Vectorize version with numpy. You could get some overflow warnings

import itertools

def juliaset_numpy(x, y, c, lim, maxit):
    julia = np.zeros((x.size, y.size), dtype=np.int32)

    zx = x[np.newaxis, :]
    zy = y[:, np.newaxis]
    
    z = zx + zy*1j
    
    for ite in itertools.count():
        
        z = z**2 + c 
        mask = np.logical_not(julia) & (np.abs(z) >= lim)
        julia[mask] = ite
        if np.all(julia) or ite > maxit:
            return julia
plot_julia_set(juliaset_numpy(x, y, c, lim, maxit))
_images/03.julia-set_34_0.png

Cython

Install Cython

Use %%cython -a to highlight the Python-C and C-Python conversions.

Cython is not Python and not C, it is another language :-)

%load_ext cython
%%cython
import numpy as np
import cython

@cython.boundscheck(False)
@cython.wraparound(False)
def juliaset_cython(double [:] x, double [:] y, double complex c, double lim, int maxit):
    cdef:
        int [:, ::1] julia = np.zeros((x.size, y.size), dtype = np.int32)
        double tmp, zr, zi, lim2 = lim*lim
        double cr = c.real, ci = c.imag
        int ite, i, j, nx=x.size, ny=y.size

    for i in range(nx):
        for j in range(ny):
            zr = x[i] 
            zi = y[j]
            ite = 0
            while (zr*zr + zi*zi) < lim2 and ite < maxit:
                zr, zi = zr*zr - zi*zi + cr, 2*zr*zi + ci
                ite += 1
            julia[j, i] = ite

    return julia
plot_julia_set(juliaset_cython(x, y, c, lim, maxit))
_images/03.julia-set_38_0.png

As f2py we can use openmp with the Cython prange function

%%cython -f -c-fopenmp --link-args=-fopenmp
import numpy as np
import cython
from cython.parallel import prange
from libc.stdlib cimport malloc, free 

@cython.boundscheck(False)
@cython.wraparound(False)
def juliaset_cython_omp(double [:] x, double [:] y, double complex c, double lim, int maxit):
    cdef:
        int [:, ::1] julia = np.zeros((x.size, y.size), dtype = np.int32)
        double tmp, zr, zi, lim2 = lim*lim
        double cr = c.real, ci = c.imag
        int  i, j, nx=x.size, ny=y.size
        int *ite

    for j in prange(ny, nogil=True, schedule='dynamic'):
        ite = <int *> malloc(sizeof(int))
        for i in range(nx):
            zr = x[i] 
            zi = y[j]
            ite[0] = 0
            while (zr*zr + zi*zi) < lim2 and ite[0] < maxit:
                zr, zi = zr*zr - zi*zi + cr, 2*zr*zi + ci
                ite[0] += 1
            julia[j, i] = ite[0]
        free(ite)
        
    return julia
plot_julia_set(juliaset_cython_omp(x, y, c, lim, maxit))
_images/03.julia-set_41_0.png

Set number of threads used for parallel functions

%env OMP_NUM_THREADS=4
%env JULIA_NUM_THREADS=4
env: OMP_NUM_THREADS=4
env: JULIA_NUM_THREADS=4
import pandas as pd
from collections import defaultdict
from tqdm import tqdm_notebook as tqdm
results = defaultdict(list)

nx, ny = 1024, 1024 # increase mesh size

x = np.linspace(-1.6, 1.6, nx)
y = np.linspace(-1.6, 1.6, ny)

functions = [juliaset_numpy,
             juliaset_fortran,
             juliaset_fortran_omp,
             juliaset_cython,
             juliaset_cython_omp,
             juliaset_numba,
             juliaset_pythran,
             juliaset_julia]

for f in tqdm(functions):

    _ = %timeit -oq -n 1 f(x, y, c, lim, maxit)
    results['etime'] += [_.best]
    
results = pd.DataFrame(results, index=list(map(lambda f:f.__name__[9:],functions)))
results["speed_up"] = [results.etime["numpy"]/t for t in results.etime]
results.sort_values(by="speed_up",axis=0)

etime speed_up
numpy 6.920963 1.000000
julia 0.083905 82.486083
numba 0.073950 93.589887
fortran 0.058941 117.422458
cython 0.056860 121.719685
pythran 0.043657 158.531005
fortran_omp 0.016614 416.571950
cython_omp 0.016206 427.060584