(* A compiler from micro-C, a fraction of the C language, to an abstract machine. Direct (forwards) compilation without optimization of jumps to jumps, tail-calls etc. sestoft@dina.kvl.dk * 2001-03-28, 2002-03-19 To compile this compiler and use it: * compile the micro-C lexer and parser specifications, then * mosmlc -I .. -c Machine.sml * mosml -P full -I .. parse.sml comp.sml * compile2file (parsef "ex11.c") "out" * java Machine out 8 A value is an integer; it may represent an integer or a pointer, where a pointer is just an address in the store (of a variable or pointer or the base address of an array). The environment maps a variable to an address (location), and the store maps a location to an integer. This freely permits pointer arithmetics, as in real C. Expressions can have side effects. A function takes a list of typed arguments and may optionally return a result. Arrays can be one-dimensional and constant-size only. For simplicity, we represent an array as a variable which holds the address of the first array element. This is consistent with the way array-type parameters are handled in C, but not with the way array-type variables are handled. Actually, this was how B (the predecessor of C) represented array variables. The store behaves as a stack, so all data are stack allocated: variables, function parameters and arrays. *) app load ["Absyn", "Env", "Machine"]; open Absyn Env Machine; (* ------------------------------------------------------------------- *) (* A global variable has a fixed address, a local one has an offset: *) datatype var = Glovar of int (* absolute address in stack *) | Locvar of int (* address relative to bottom of frame *) (* The compile-time environment also keeps track of next available offset: *) type venv = (string, var * typ) env * int type fenv = (string, label * typ option * paramdec list) env (* Bind declared variable in env and generate code to allocate it: *) fun allocate (kind : int -> var) (typ, x) (venv : venv) : venv * instr list = let val (env, fdepth) = venv in case typ of TypA (TypA _, _) => raise Fail "allocate: arrays of arrays not permitted" | TypA (t, SOME (Cst (CstI i))) => ((bind1 env (x, (kind (fdepth+i), typ)), fdepth+i+1), [INCSP i, GETSP, CSTI (i-1), SUB]) | TypA (t, e) => raise Fail "allocate: dynamic array size" | _ => ((bind1 env (x, (kind (fdepth), typ)), fdepth+1), [CSTI 0]) end (* Bind declared parameter in env: *) fun bindparam (typ, x) (env, fdepth) : venv = (bind1 env (x, (Locvar fdepth, typ)), fdepth+1) fun bindparams params ((env, fdepth) : venv) : venv = List.foldl (fn (param, res) => bindparam param res) (env, fdepth) params; (* ------------------------------------------------------------------- *) (* Global environments for variables and functions *) local val glofenv = ref Env.empty : fenv ref in fun mkvenv (topdecs : topdec list) : venv * instr list = let fun addv [] env glosize = ((env, glosize), []) | addv (Vardec typx :: tdr) env glosize = let val ((env1, glosize1), code1) = allocate Glovar typx (env, glosize) val ((envr, glosizer), coder) = addv tdr env1 glosize1 in ((envr, glosizer), code1 @ coder) end | addv (Fundec _ :: tdr) env glosize = addv tdr env glosize in addv topdecs Env.empty 0 end; fun mkfenv (topdecs : topdec list) : unit = let fun addf [] fenv = fenv | addf (Fundec(tyOpt, f, xs, body) :: tdr) fenv = addf tdr (bind1 fenv (f, (newLabel(), tyOpt, xs))) | addf (Vardec _ :: tdr) fenv = addf tdr fenv in glofenv := addf topdecs Env.empty end; fun lookupglobalfun f = (lookup (!glofenv) f) handle Subscript => raise Fail ("Function " ^ f ^ " not found") end (* ------------------------------------------------------------------- *) (* Compiling uC statements *) fun cStmt stmt (env : venv) : instr list = case stmt of If(e, stmt1, stmt2) => let val labelse = newLabel() val labend = newLabel() val code1 = cStmt stmt1 env val code2 = cStmt stmt2 env in cExpr e env @ [IFZERO labelse] @ code1 @ [GOTO labend] @ [Label labelse] @ code2 @ [Label labend] end | While(e, body) => let val labbegin = newLabel() val labtest = newLabel() val codebody = cStmt body env in [GOTO labtest, Label labbegin] @ codebody @ [Label labtest] @ cExpr e env @ [IFNZRO labbegin] end | Expr e => cExpr e env @ [INCSP ~1] | Block stmts => let fun loop [] env = (#2 env, []) | loop (s1::sr) env = let val (env1, code1) = cStmtOrDec s1 env val (fdepthr, coder) = loop sr env1 in (fdepthr, code1 @ coder) end val (fdepthend, code) = loop stmts env in code @ [INCSP(#2 env - fdepthend)] end | Return NONE => [RET (#2 env - 1)] | Return (SOME e) => cExpr e env @ [RET (#2 env)] and cStmtOrDec (Stmt stmt) env : venv * instr list = (env, cStmt stmt env) | cStmtOrDec (Dec (typ, x)) env = allocate Locvar (typ, x) env (* Compiling uC expressions: * e is the expression to compile * env is the compile-time environment Net effect principle: if the compilation (cExpr e env) of expression e returns the instruction sequence instrs, then the execution of instrs will leave the value of expression e on the stack top (and thus extend the current stack frame with one element). *) and cExpr (e : expr) (env : venv) : instr list = case e of Access acc => cAccess acc env @ [LDI] | Assign(acc, e) => cAccess acc env @ cExpr e env @ [STI] | Cst (CstI i) => [CSTI i] | Cst CstN => [CSTI 0] | Addr acc => cAccess acc env | Prim1(ope, e1) => cExpr e1 env @ (case ope of "!" => [NOT] | "printi" => [PRINTI] | "printc" => [PRINTC] | _ => raise Fail "unknown primitive 1") | Prim2(ope, e1, e2) => cExpr e1 env @ cExpr e2 env @ (case ope of "*" => [MUL] | "+" => [ADD] | "-" => [SUB] | "/" => [DIV] | "%" => [MOD] | "==" => [EQ] | "!=" => [EQ, NOT] | "<" => [LT] | ">=" => [LT, NOT] | ">" => [SWAP, LT] | "<=" => [SWAP, LT, NOT] | _ => raise Fail "unknown primitive 2") | Andalso(e1, e2) => let val labend = newLabel() val labfalse = newLabel() in cExpr e1 env @ [IFZERO labfalse] @ cExpr e2 env @ [GOTO labend, Label labfalse, CSTI 0, Label labend] end | Orelse(e1, e2) => let val labend = newLabel() val labtrue = newLabel() in cExpr e1 env @ [IFNZRO labtrue] @ cExpr e2 env @ [GOTO labend, Label labtrue, CSTI 1, Label labend] end | Call(f, es) => callfun f es env (* Generate code to access variable, dereference pointer or index array: *) and cAccess (AccVar x) env = (case lookup (#1 env) x of (Glovar addr, _) => [CSTI addr] | (Locvar addr, _) => [GETBP, CSTI addr, ADD]) | cAccess (AccDeref e) env = cExpr e env | cAccess (AccIndex(acc, idx)) env = cAccess acc env @ [LDI] @ cExpr idx env @ [ADD] (* Generate code to evaluate expressions es: *) and cExprs es env = List.concat(List.map (fn e => cExpr e env) es) (* Generate code to evaluate arguments es and then call function f: *) and callfun f es env : instr list = let val (labf, tyOpt, paramdecs) = lookupglobalfun f val argc = List.length es in if argc = List.length paramdecs then cExprs es env @ [CALL(argc, labf)] else raise Fail (f ^ ": parameter/argument mismatch") end (* Generate code for all uC functions, and code to invoke main() *) fun cProgram (Prog topdecs) : instr list * instr list = let val _ = resetLabels () val ((globalenv, globalsize), globalinit) = mkvenv topdecs val _ = mkfenv topdecs fun compilefun (tyOpt, f, xs, body) = let val (labf, _, params) = lookupglobalfun f val (envf, fdepthf) = bindparams params (globalenv, 0) val code = cStmt body (envf, fdepthf) in [Label labf] @ code @ [RET (List.length params-1)] end val functions = List.mapPartial (fn Fundec fundec => SOME (compilefun fundec) | Vardec _ => NONE) topdecs val (mainlab, _, mainparams) = lookupglobalfun "main" val argc = List.length mainparams in (globalinit @ [STOP], [CALL(argc, mainlab), STOP] @ List.concat functions) end; (* Compile the program (in abstract syntax) and write it to file fname; also, return the program as a list of instructions. *) fun compile2file program fname = let val (globalinit, functions) = cProgram program val initcode = Machine.code2ints 1 globalinit val funcstart = length initcode + 1 val funccode = Machine.code2ints funcstart functions in intstofile (funcstart :: initcode @ funccode) fname; functions end; (* Example programs are found in the files ex1.c, ex2.c, etc *)