pico-8 cartridge // http://www.pico-8.com
version 42
__lua__
-- prof: cpu cycle counter v1.4
--  BY PANCELOR
--[[------------------------


use this cart to precisely
measure code execution time

--------------------------------
         ★ overview ★
--------------------------------
| tab 0 | usage guide          |
| tab 1 | (internals)          |
| tab 2 | your code here       |
--------------------------------


-----------------------
-- ★ usage guide ★ --
-----------------------


웃: i have two code snippets;
    which one is faster?

🐱: edit the last tab with your
    snippets, then run the cart.
    it will tell you precisely
    how much cpu it takes to
    run each snippet.

    the results are also copied
    to your clipboard.


웃: what do the numbers mean?

🐱: the cpu cost is reported
    as lua and system cycle
    counts. look up stat(1)
    and stat(2) for more info.

    if you're not sure, just
    look at the first number.
    lower is faster (better)


웃: why "{locals={9}}"
    in the example?

🐱: accessing local variables
    is faster than global vars.

    so if your test involves
    local variables, simulate
    this by passing them in:

      prof(function(a)
        sqrt(a)
      end,{ locals={9} })

    /!\     /!\     /!\     /!\
    local values from outside
    the current scope are also
    slower to access! example:

      global = 4
      local outer = 4
      prof(function(x)
        local _ = x --fast
      end,function(x)
        local _ = outer --slow
      end,function(x)
        local _ = global --slow
      end,{ locals={4} })
    /!\     /!\     /!\     /!\


웃: can i do "prof(myfunc)"?

🐱: no, this sometimes gives
    wrong results! always use
    inline functions:

      prof(function()
        --code for myfunc here
      end)

    as an example, "prof(sin)"
    reports "-2" -- wrong! but
    "prof(function()sin()end)"
    correctly reports "4"

    (see the technical notes at
    the start of the next tab
    for a brief explanation.
    technically, "prof(myfunc)"
    will work if myfunc was made
    by the user, but you will
    risk confusing yourself)


---------------
 ★ method 2 ★
---------------


this cart is based on
code by samhocevar:
https://www.lexaloffle.com/bbs/?pid=60198#p

if you do this method, be very
careful with local/global vars.
it's very easy to accidentally
measure the wrong thing.

here's an example of how to
measure cycles (ignoring this
cart and using the old method)

  function _init()
    local a=11.2 -- locals

    local n=1024
    flip()
    local tot1,sys1=stat(1),stat(2)
    for i=1,n do   end --calibrate
    local tot2,sys2=stat(1),stat(2)
    for i=1,n do local _=sqrt(a) end --measure
    local tot3,sys3=stat(1),stat(2)

    function cyc(t0,t1,t2) return ((t2-t1)-(t1-t0))*128/n*256/stat(8)*256 end
    local lua = cyc(tot1-sys1,tot2-sys2,tot3-sys3)
    local sys = cyc(sys1,sys2,sys3)
    print(lua.."+"..sys.."="..(lua+sys).." (lua+sys)")
  end

run this once, see the results,
then change the "measure" line
to some other code you want
to measure.

note: wrapping the code inside
"_init()" is required, otherwise
builtin functions like "sin"
will be measured wrong.
(the reason is explained at
the start of the next tab)


---------------
 ★ method 3 ★
---------------


another way to measure cpu cost
is to run something like this:

  function _draw()
    cls(1)
    local x=9
    for i=1,1000 do
      local a=sqrt(x) --snippet1
  --    local b=x^0.5 --snippet2
    end
  end

while running, press ctrl-p to
see the performance monitor.
the middle number shows how much
of cpu is being used, as a
fraction. (0.60 = 60% used)

now, change the comments on the
two code snippets inside _draw()
and re-run. compare the new
result with the old to determine
which snippet is faster.

note: every loop iteration costs
an additional 2 cycles, so the
ratio of the two fractions will
not match the ratio of the 
execution time of the snippets.
but this method can quickly tell
you which snippet is faster.


]]

-->8
--[[ profiler.lua
more info: https://www.lexaloffle.com/bbs/?tid=46117

usage:
  prof(function()
    memcpy(0,0x200,64)
  end,function()
    poke4(0,peek4(0x200,16))
  end)

passing locals:
  prof(
    function(a,b)
      local c=(a+1)*(b+1)-1
    end,
    function(a,b)
      local c=a*b+a+b
    end,
    {locals={3,5}}
  )

getting global/local variables exactly right
is very tricky; you should always use inline
functions like above; if you try e.g. prof(sin)
the results will be wrong.


# minutiae / notes to self:
---------------------------
doing this at top-level is awkward:
  for _=1,n do       end -- calibrate
  for _=1,n do sin() end -- measure
b/c sin is secretly local at top-level,
so it gives a misleading result (3 cycles).
do it inside _init instead for a
more representative result (4 cycles).

## separate issue:
------------------
if you call prof(sin), it gives the wrong result (-2 cycles) because
it's comparing sin() against noop() (not truly nothing).
but we want the noop() there for normal inline prof() calls,
to avoid measuring the cost of the indirection
(calling func() from inside prof() is irrelevant to
how cpu-expensive func()'s body is)
]]

-- prof(fn1,fn2,...,fnN,[opts])
--
-- opts.locals: values to pass
-- opts.name: text label
-- opts.n: number of iterations
function prof(...)
  local funcs={...}
  local opts=type(funcs[#funcs])=="table" and deli(funcs) or {}

  -- build output string
  local msg=""
  local function log(s)
    msg..=s.."\n"
  end

  if opts.name then
    log("prof: "..opts.name)
  end
  for fn in all(funcs) do
    local dat=prof_one(fn,opts)
    log(sub("  "..dat.total,-3)
      .." ("
      ..dat.lua
      .." lua, "
      ..dat.sys
      .." sys)")
  end

  -- copy to clipboard
  printh(msg,"@clip")
  -- print + pause
  cls()
  stop(msg)
end

function prof_one(func, opts)
  opts = opts or {}
  local n = opts.n or 0x200 --how many times to call func
  local locals = opts.locals or {} --locals to pass func

  -- we want to type
  --   local m = 0x80_0000/n
  -- but 8MHz is too large to fit in a pico-8 number,
  -- so we do (0x80_0000>>16)/(n>>16) instead
  -- (n is always an integer, so n>>16 won't lose any bits)
  local m = 0x80/(n>>16)
  assert(0x80/m << 16 == n, "n is too small") -- make sure m didn't overflow
  local fps = stat(8)

  -- given three timestamps (pre-calibration, middle, post-measurement),
  --   calculate how many more CPU cycles func() took compared to noop()
  -- derivation:
  --   T := ((t2-t1)-(t1-t0))/n (frames)
  --     this is the extra time for each func call, compared to noop
  --     this is measured in #-of-frames -- it will be a small fraction for most ops
  --   F := 1/30 (seconds/frame) (or 1/60 if this test is running at 60fps)
  --     this is just the framerate that the tests run at, not the framerate of your game
  --   M := 256*256*128 = 0x80_0000 = 8MHz (cycles/second)
  --     (PICO-8 runs at 8MHz; see https://www.lexaloffle.com/dl/docs/pico-8_manual.html#CPU)
  --   cycles := T frames * F seconds/frame * M cycles/second
  -- optimization / working around pico-8's fixed point numbers:
  --   T2 := T*n = (t2-t1)-(t1-t0)
  --   M2 := M/n = (M>>16)/(n>>16) := m (e.g. when n is 0x1000, m is 0x800)
  --   cycles := T2*M2*F
  local function cycles(t0,t1,t2)
    local diff = (t2-t1)-(t1-t0)
    local e1 = "must use inline functions -- see usage guide"
    assert(0<=diff,e1)
    local thresh = 0x7fff.ffff/(m/fps)
    local e2 = "code is too large or slow -- try profiling manually with stat(1)"
    assert(diff<=thresh,e2)
    return diff*(m/fps)
  end

  local noop = function() end -- this must be local, because func is local
  flip() --avoid flipping mid-measurement
  local atot,asys=stat(1),stat(2)
  for _=1,n do noop(unpack(locals)) end -- calibrate
  local btot,bsys=stat(1),stat(2)
  for _=1,n do func(unpack(locals)) end -- measure
  local ctot,csys=stat(1),stat(2)

  -- gather results
  local tot=cycles(atot,btot,ctot)
  local sys=cycles(asys,bsys,csys)
  return {
    lua=tot-sys,
    sys=sys,
    total=tot,
  }
end

-->8
-- your code here

--edit me:
prof(function(dx,dy)
  local d = max(abs(dx),abs(dy))
  local n = min(abs(dx),abs(dy)) / d
  return sqrt(n*n + 1) * d
end,function(dx, dy)
  local d,n=abs(dx),abs(dy)
  if (d<n) d,n=n,d
  n/=d
  return (n*n + 1)^0.5 * d
end,function(dx,dy)
  local d,n=abs(dx),abs(dy)
  if (d<n) d,n=n,d
  n/=d
  return sqrt(n*n + 1) * d
end,function(dx,dy)
 local s = sin(atan2(dx,dy))
 if (s==0) return abs(dx)
 return abs(dy/s)
end,{ locals={3,4} })

-- "locals" (optional) are
-- passed in as args. see the
-- usage guide for details.

__label__
00006660000000006660000000006660000006006000606066600000066060600660060000000000000000000000000000000000000000000000000000000000
00006000060000006060666000006000000060006000606060600600600060606000006000000000000000000000000000000000000000000000000000000000
00006660666000006060000000006660000060006000606066606660666066606660006000000000000000000000000000000000000000000000000000000000
00000060060000006060666000000060000060006000606060600600006000600060006000000000000000000000000000000000000000000000000000000000
00006660000000006660000000006660000006006660066060600000660066606600060000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00006060000000006660000000006060000006006000606066600000066060600660060000000000000000000000000000000000000000000000000000000000
00006060060000006060666000006060000060006000606060600600600060606000006000000000000000000000000000000000000000000000000000000000
00006660666000006060000000006660000060006000606066606660666066606660006000000000000000000000000000000000000000000000000000000000
00000060060000006060666000000060000060006000606060600600006000600060006000000000000000000000000000000000000000000000000000000000
00000060000000006660000000000060000006006660066060600000660066606600060000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
70000000888800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
07000000888800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00700000888800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
07000000888800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
70000000888800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

__sfx__
030100003052500505005050050500505005050050500505005050050500505005050050500505005050050500505005050050500505005050050500505005050050500505005050050500505005050050500505