Hack your own Erlang VM.

List of episodes

  1. BEAM files, basics of code loading, index tables
  2. register vs. stack-based VM, bytecode interpreter
  3. processes, basic scheduling

Life of a .erl file

  1. your pretty Erlang module
  2. preprocessing
    compile:file(File, ['P']).
  3. expansion
    compile:file(File, ['E']).
  4. Core Erlang
    compile:file(File, [to_core]).
  5. Kernel Erlang
    compile:file(File, [to_kernel]).
  6. Assembly
    compile:file(File, ['S']).
  7. BEAM file
    compile:file(File).

Erlang source

-module(fac).
-export([fac/1]).
-include("fac.hrl").

fac(N) ->
    fac(N, #state{acc=1}).

fac(0, #state{acc=Acc}) ->
    Acc;
fac(N, #state{acc=Acc}=State) ->
    fac(N-1, State#state{acc=Acc*N}).

After preprocessing

-file("fac.erl", 1).

-module(fac).

-export([fac/1]).

-file("fac.hrl", 1).

-record(state,{acc}).

-file("fac.erl", 6).

fac(N) ->
    fac(N, #state{acc = 1}).

fac(0, #state{acc = Acc}) ->
    Acc;
fac(N, #state{acc = Acc} = State) ->
    fac(N - 1, State#state{acc = Acc * N}

Expanded records (still Erlang)

-file("fac.erl", 1).
-file("fac.hrl", 1).
-file("fac.erl", 6).

fac(N) ->
    fac(N, {state,1}).

fac(0, {state,Acc}) ->
    Acc;
fac(N, {state,Acc} = State) ->
    fac(N - 1,
        begin
            rec0 = Acc * N,
            rec1 = State,
            case rec1 of
                {state,rec2} ->
                    {state,rec0};
                _ ->
                    error({badrecord,state})
            end
        end).

module_info() ->
    erlang:get_module_info(fac).

module_info(X) ->
    erlang:get_module_info(fac, X).

Core Erlang

module 'fac' ['fac'/1,
	      'module_info'/0,
	      'module_info'/1]
    attributes []
'fac'/1 =
    %% Line 7
    fun (_cor0) ->
	%% Line 8
	apply 'fac'/2
	    (_cor0, {'state',1})
'fac'/2 =
    %% Line 10
    fun (_cor1,_cor0) ->
	case <_cor1,_cor0> of
	  <0,{'state',Acc}> when 'true' ->
	      %% Line 11
	      Acc
	  %% Line 12
	   when 'true' ->
	      let <_cor6> =
		  %% Line 13

Kernel Erlang

module 'fac'
export ['fac'/1,
	'module_info'/0,
	'module_info'/1]
attributes []
fdef 'fac'/1(_cor0) =
  enter (local 'fac'/2)(_cor0, {state,1})
fdef 'fac'/2(_cor1, _cor0) =
  match _cor1,_cor0
    alt
      select _cor1
	type k_int
	  0 ->
	    select _cor0
	      type k_tuple
		{_ker6,_ker5} ->
		  select _ker6
		    type k_atom
		      'state' ->
			<<_ker5>>
      alt
	select _cor0
	  type k_tuple
	    {_ker4,_ker3} ->
	      select _ker4

Assembly

{module, fac}.  %% version = 0
{exports, [{fac,1},{module_info,0},{module_info,1}]}.
{attributes, []}.
{labels, 10}.

{function, fac, 1, 2}.
  {label,1}.
    {line,[{location,"fac.erl",7}]}.
    {func_info,{atom,fac},{atom,fac},1}.
  {label,2}.
    {move,{literal,{state,1}},{x,1}}.
    {call_only,2,{f,4}}.

{function, fac, 2, 4}.
  {label,3}.
    {line,[{location,"fac.erl",10}]}.
    {func_info,{atom,fac},{atom,fac},2}.
  {label,4}.
    {test,is_eq_exact,{f,5},[{x,0},{integer,0}]}.
    {test,is_tuple,{f,3},[{x,1}]}.
    {test,test_arity,{f,5},[{x,1},2]}.

BEAM file


<<70,79,82,49,
  0,0,2,136,
  66,69,65,77,
  65,116,111,109,
  0,0,0,53
  ...>>

BEAM file


<<"FOR1",
  0,0,2,136,
  66,69,65,77,
  65,116,111,109,
  0,0,0,53
  ...>>

BEAM file


<<"FOR1",
  648:32,
  66,69,65,77,
  65,116,111,109,
  0,0,0,53
  ...>>

BEAM file


<<"FOR1",
  648:32,
  "BEAM",
  65,116,111,109,
  0,0,0,53
  ...>>

BEAM file


<<"FOR1",
  648:32,
  "BEAM",
  "Atom",
  0,0,0,53
  ...>>

Obligatory chunks

Other chunks

Atoms

-module(fac).
-export([fac/1]).
-include("fac.hrl").

fac(N) ->
    fac(N, #state{acc=1}).

fac(0, #state{acc=Acc}) ->
    Acc;
fac(N, #state{acc=Acc}=State) ->
    fac(N-1, State#state{acc=Acc*N}).

Atoms

-module(fac).
-export([fac/1]).
-include("fac.hrl").

fac(N) ->
    fac(N, #state{acc=1}).

fac(0, #state{acc=Acc}) ->
    Acc;
fac(N, #state{acc=Acc}=State) ->
    fac(N-1, State#state{acc=Acc*N}).

Atoms

-module(fac).
-export([fac/1]).
-include("fac.hrl").

fac(N) ->
    fac(N, #state{acc=1}).

fac(0, #state{acc=Acc}) ->
    Acc;
fac(N, #state{acc=Acc}=State) ->
    fac(N-1, State#state{acc=Acc*N}).

Atoms


fac(N) ->
    fac(N, {state,1}).

fac(0, {state,Acc}) ->
    Acc;
fac(N, {state,Acc} = State) ->
    fac(N - 1,
        begin
            rec0 = Acc * N,
            rec1 = State,
            case rec1 of
                {state,rec2} ->
                    {state,rec0};
                _ ->
                    error({badrecord,state})
            end
        end).

module_info() ->
    erlang:get_module_info(fac).

module_info(X) ->
    erlang:get_module_info(fac, X).

Atoms

Atoms

<<0,0,0,7,
  3,102,97,99,
  5,115,116,97,116,101,
  6,101,114,108,97,110,103,
  1,45,
  1,42,
  11,109,111,100,117,108,101,95,105,110,102,111,
  15,103,101,116,95,109,111,100,117,108,101,95,105,110,102,111>>

Atoms

<<0,0,0,7,
  3,102,97,99,
  5,115,116,97,116,101,
  6,101,114,108,97,110,103,
  1,45,
  1,42,
  11,109,111,100,117,108,101,95,105,110,102,111,
  15,103,101,116,95,109,111,100,117,108,101,95,105,110,102,111>>

Atoms

<<0,0,0,7,
  3,"fac",
  5,115,116,97,116,101,
  6,101,114,108,97,110,103,
  1,45,
  1,42,
  11,109,111,100,117,108,101,95,105,110,102,111,
  15,103,101,116,95,109,111,100,117,108,101,95,105,110,102,111>>

Atoms

<<0,0,0,7,
  3,"fac",
  5,"state",
  6,101,114,108,97,110,103,
  1,45,
  1,42,
  11,109,111,100,117,108,101,95,105,110,102,111,
  15,103,101,116,95,109,111,100,117,108,101,95,105,110,102,111>>

Atoms

<<0,0,0,7,
  3,"fac",
  5,"state",
  6,"erlang",
  1,45,
  1,42,
  11,109,111,100,117,108,101,95,105,110,102,111,
  15,103,101,116,95,109,111,100,117,108,101,95,105,110,102,111>>

Atoms

32> beam_lib:chunks(fac, ["Atom"]).
{ok,{fac,[{"Atom",
           <<0,0,0,7,3,102,97,99,5,115,116,97,116,101,6,101,114,
             108,97,110,103,1,...>>}]}}

Atoms

33> beam_lib:chunks(fac, [atoms]).
{ok,{fac,[{atoms,[{1,fac},
                  {2,state},
                  {3,erlang},
                  {4,'-'},
                  {5,'*'},
                  {6,module_info},
                  {7,get_module_info}]}]}}

Index tables

Index tables

Trivia time

Q: Given a freshly started Erlang/OTP (17.3), how many atoms are loaded into the atom table?

A: 6842


Q: The same, but with MongooseIM started?

A: 16787

Tagging scheme

List representation

Tuple representation

What is code loading (in simple terms)?

Exercises

  1. implement the atom table
  2. implement the code table*
  3. implement the export table

References

/