Performance annotations

function new_sum(myvec::Vector{Int})
    s = zero(eltype(myvec))
    for i = eachindex(myvec)
        s += myvec[i]
    end
    return s
end

function new_sum_inbounds(myvec::Vector{Int})
    s = zero(eltype(myvec))
    @inbounds for i = eachindex(myvec)
        s += myvec[i]
    end
    return s
end
new_sum_inbounds (generic function with 1 method)
using BenchmarkTools

myvec = collect(1:1000000)
@btime new_sum($myvec)
@btime new_sum_inbounds($myvec)
  97.431 μs (0 allocations: 0 bytes)
  128.479 μs (0 allocations: 0 bytes)
500000500000

@noinline function inner(x, y)
    s = zero(eltype(x))
    for i = eachindex(x, y)
        @inbounds s += x[i]*y[i]
    end
    return s
end;
@noinline function innersimd(x, y)
    s = zero(eltype(x))
    @simd for i = eachindex(x, y)
        @inbounds s += x[i] * y[i]
    end
    return s
end;

function timeit(n, reps)
    x = rand(Float32, n)
    y = rand(Float32, n)
    s = zero(Float64)
    time = @elapsed for j in 1:reps
        s += inner(x, y)
    end
    println("GFlop/sec        = ", 2n*reps / time*1E-9)
    time = @elapsed for j in 1:reps
        s += innersimd(x, y)
    end
    println("GFlop/sec (SIMD) = ", 2n*reps / time*1E-9)
end
timeit(10, 10)
timeit(1000, 1000)
GFlop/sec        = 0.7692307692307693
GFlop/sec (SIMD) = 0.711743772241993
GFlop/sec        = 2.0960493661546717
GFlop/sec (SIMD) = 30.39467485296576

function init!(u::Vector)
    n = length(u)
    dx = 1.0 / (n-1)
    @fastmath @inbounds @simd for i in eachindex(u) 
        u[i] = sin(2pi*dx*i)
    end
end

function deriv!(u::Vector, du)
    n = length(u)
    dx = 1.0 / (n-1)
    @fastmath @inbounds du[1] = (u[2] - u[1]) / dx
    @fastmath @inbounds @simd for i in 2:n-1
        du[i] = (u[i+1] - u[i-1]) / (2*dx)
    end
    @fastmath @inbounds du[n] = (u[n] - u[n-1]) / dx
end
deriv! (generic function with 1 method)

function mynorm(u::Vector)
    T = eltype(u)
    s = zero(T)
    @fastmath @inbounds @simd for i in eachindex(u)
        s += u[i]^2
    end
    @fastmath @inbounds return sqrt(s)
end
mynorm (generic function with 1 method)

function main(n)
    u = Vector{Float64}(undef, n)
    init!(u)
    du = similar(u)

    deriv!(u, du)
    nu = mynorm(du)

    @time for i in 1:10^6
        deriv!(u, du)
        nu = mynorm(du)
    end

    println(" nu = $nu ")
end

main(10)
@time main(2000)
  0.023373 seconds
 nu = 13.187330309540112 
  0.450647 seconds
 nu = 198.74110382490193 
  0.450825 seconds (187 allocations: 44.430 KiB)

run(`julia --math-mode=ieee performance/wave.jl`)
  0.032787 seconds
 nu = 13.187330309540112 
  0.871918 seconds
 nu = 198.74110382490198 
  0.872002 seconds (26 allocations: 33.352 KiB)
Process(`julia --math-mode=ieee performance/wave.jl`, ProcessExited(0))