COMPILERS

simple calculator grammar

-- grammar : 
-- expr   → term ((PLUS | MINUS) term)*
-- term   → factor ((MULT | DIV) factor)*
-- factor → NUM | LPAR expr RPAR

dragon book grammar

L -> I;L | eof
I -> id = E
E -> T+E | T-E | T
T -> F*T | F/T | F
F -> id | nb | (E)

simple calculator

-- INTERPRETER DESIGN
-- inspired by https://github.com/JacobLondon/InterpreterCalculator/blob/a8623488d734352d6754f9eff0a5a397f6d5dcb5/parse.py 


function isdigit(char)
	-- careful : 
	return char:match("%d") ~= nil
end

function print_table(table)
	for i=1, #table do
		io.write(table[i])
	end
	io.write("\n")
end



function Token(kind, value)
	return {
          kind = kind,
	  value = value
	}
end

function Lexer(text)
  local index = 1
  local current = text:sub(index, index)

  local function advance()
    index = index + 1

    if index > #text then
	    current = nil
    else 
    	current = text:sub(index, index)
    end
  end

  local function skip_whitespace()
	  while current ~= nil and current:sub(index, index) == ' ' do 
		  self.advance()
	  end
  end

  
  -- 
  local function number()
	  local digits = ''
	  while (not current == nil) or (isdigit(current)) or (current == '.') do
		  digits = digits..current
		  advance()
		  -- current can become nil here. But fonction will continue so we return if it-s the case
		  if current == nil then return tonumber(digits) end
	  end
	  return tonumber(digits)
  end

  local function next_token()

    while current ~= nil do

      if current == ' ' then
	      advance()
	      goto continue
      end

      if isdigit(current) then 
	      return Token('NUM', number()) 
      end

      if current == '*' then 
	      advance() 
	      return Token('MULT', '*') 
      end
      if current == '/' then 
	      advance()
	      return Token('DIV', '/') 
      end
      if current == '+' then 
	      advance()
	      return Token('PLUS', '+')
      end

      if current == '-' then 
	      advance()
	      return Token('MINUS', '-') 
      end
      if current == '(' then 
	      advance()
	      return Token('LPAR', '(') 
      end
      if current == ')' then 
	      advance()
	      return Token('RPAR', ')') 
      end

      error("invalid token")


      ::continue::
    end
    return Token("EOF", "EOF")
  end

  return {
    advance = advance,
    skip_whitespace = skip_whitespace,
    number = number,
    next_token = next_token,
    get_current = function() return current end,
    get_index = function() return index end
  }
end


-- grammar : 
-- expr   → term ((PLUS | MINUS) term)*
-- term   → factor ((MULT | DIV) factor)*
-- factor → NUM | LPAR expr RPAR
--

function Parser(lexer) 
	local current_token = lexer.next_token()

	function eat(token_kind) 
		if current_token.kind == token_kind then
			current_token = lexer.next_token()
		else 
			error("Invalid syntax")
		end
	end
	
	function factor()
		local token = current_token
		if token.kind == 'NUM' then
			eat('NUM')
			return token.value
		elseif token.kind == 'LPAR' then
			eat('LPAR')
			local result = expr()
			eat('RPAR')
			return result
		end
	end

	function term()
		local result = factor()
		while current_token.kind == 'MULT' or current_token.kind == 'DIV' do
			local token = current_token
			if token.kind == 'MULT' then
				eat('MULT')
				result = result*factor()
			elseif token.kind == 'DIV' then
				eat('DIV')
				local divider = factor()
				if divider == 0 then error("dividing by 0") end
				result = result/divider
			end
		end
		return result
	end

	function expr()
		local result = term()
		while current_token.kind == 'PLUS' or current_token.kind == 'MINUS' do
			local token = current_token
			if token.kind == 'PLUS' then
				eat('PLUS')
				result = result+term()
			elseif token.kind == 'MINUS' then
				eat('MINUS')
				result = result - term()
			end
		end
		return result
	end

	function read()
		return expr
	end

	return {
		eat = eat,
		factor = factor,
		term = term,
		expr = expr,
		read = read
	}
end




program = "22  + 3.5  * ( 4      - 1)    "



-- TESTS

-- isdigit()
assert(isdigit("1"))
assert(not isdigit("A"))
assert(not isdigit("."))
assert(not isdigit("+"))
assert(not isdigit(" "))
assert(isdigit("9"))
assert(isdigit("0"))


-- Token
local tok = Token('NUM', 72)
assert(tok.kind == 'NUM' and tok.value == 72)


-- Lexer
local lex = Lexer(program)

for i=1, #program do
	assert(lex.get_index() == i)
	assert(lex.get_current() == program:sub(i,i))
	lex.advance()
end

local lex = Lexer(program)
repeat 
	tok = lex.next_token()
until tok.kind == "EOF"




-- Parser
local lex2 = Lexer(program)
local parser = Parser(lex2)
assert(parser.expr() == 32.5)

local program2 = "(((5 + 3) * (9 - 4)) / (2 + 3)) + ((12 * 2) - (4 * 3)) + ((18 / 3) + (6 * 2)) - ((8 + 4) / (2 * 1))"
local lex3 = Lexer(program2)
local parser2 = Parser(lex3)
assert(parser2.expr() == 32)

local program3 = "5+1*92"
local lex4 = Lexer(program3)
local parser3 = Parser(lex4)
assert(parser3.expr() == 97)

Calculator with AST

-- INTERPRETER DESIGN
-- inspired by https://github.com/JacobLondon/InterpreterCalculator/blob/a8623488d734352d6754f9eff0a5a397f6d5dcb5/parse.py 


function isdigit(char)
	return char:match("%d") ~= nil
end

function Token(kind, value)
	return {
		kind = kind,
		value = value
	}
end

function Node(value, op, left, right)
	local node = {
		value = value,
		op = op,
		left = left,
		right = right,
	}
	node.isLeaf = function()
		return node.value ~= nil
	end

	return node
end

function Lexer(text)
	local index = 1
	local current = text:sub(index, index)
	
	local function advance()
		index = index + 1
	
		if index > #text then
		        current = nil
		else 
			current = text:sub(index, index)
		end
	end
	
	local function skip_whitespace()
	        while current ~= nil and current:sub(index, index) == ' ' do 
	      		self.advance()
	        end
	end
	
	local function number()
	        local digits = ''
	        while (not current == nil) or (isdigit(current)) or (current == '.') do
	      		digits = digits..current
	      		advance()
	      		-- current can become nil here. But fonction will continue so we return if it-s the case
	      		if current == nil then return tonumber(digits) end
	        end
	        return tonumber(digits)
	end
	
	local function next_token()
		while current ~= nil do
		
			if current == ' ' then
			        advance()
			        goto continue
			end
		
			if isdigit(current) then 
			        return Token('NUM', number()) 
			end
		
			if current == '*' then 
			        advance() 
			        return Token('MULT', '*') 
			end
			if current == '/' then 
			        advance()
			        return Token('DIV', '/') 
			end
			if current == '+' then 
			        advance()
			        return Token('PLUS', '+')
			end
		
			if current == '-' then 
			        advance()
			        return Token('MINUS', '-') 
			end
			if current == '(' then 
			        advance()
			        return Token('LPAR', '(') 
			end
			if current == ')' then 
			        advance()
			        return Token('RPAR', ')') 
			end
		
			error("invalid token")
		
		
			::continue::
		end
	return Token("EOF", "EOF")
	end
	
	return {
		advance = advance,
		skip_whitespace = skip_whitespace,
		number = number,
		next_token = next_token
	}
end

function Parser(lexer) 
	local current_token = lexer.next_token()

	function eat(token_kind) 
		if current_token.kind == token_kind then
			current_token = lexer.next_token()
		else 
			error("Invalid syntax")
		end
	end
	
	function factor()
		local token = current_token
		if token.kind == 'NUM' then
			eat('NUM')
			return Node(token.value, nil, nil, nil)
		elseif token.kind == 'LPAR' then
			eat('LPAR')
			local node = expr()
			eat('RPAR')
			return node
		end
	end

	function term()
		local node = factor()
		while current_token.kind == 'MULT' or current_token.kind == 'DIV' do
			local token = current_token
			if token.kind == 'MULT' then
				eat('MULT')
			elseif token.kind == 'DIV' then
				eat('DIV')
			end
			node = Node(nil, token.value, node, factor()) 
		end
		return node
	end

	function expr()
		local node = term()
		while current_token.kind == 'PLUS' or current_token.kind == 'MINUS' do
			local token = current_token
			if token.kind == 'PLUS' then
				eat('PLUS')

			elseif token.kind == 'MINUS' then
				eat('MINUS')
			end
			node = Node(nil, token.value, node, term())

		end
		return node 
	end

	function read()
		return expr()
	end

	return {
		eat = eat,
		factor = factor,
		term = term,
		expr = expr,
		read = read
	}
end

function eval(node)
	if node == nil then return end --careful here

	if node.isLeaf() then return node.value end

	local left_val = eval(node.left)
	local right_val = eval(node.right)

	if node.op == '+' then return left_val+right_val end
	if node.op == '-' then return left_val-right_val end
	if node.op == '*' then return left_val*right_val end
	if node.op == '/' then return left_val/right_val end

	error("unknown op")

end


local program = "(((5 + 3 * 2) * (9 - 4)) / (2 + 3)) + ((12 * 2) - (4 * 3)) + ((18 / 3 + 1) + (6 * 2)) - ((8 + 4 - 1 + 3) / (2 * 1))"


-- Parser
local lex2 = Lexer(program)
local parser = Parser(lex2)
local node = parser.expr()
assert(eval(node) == 35)

Calculator + AST + lexer knows identifiers

-- INTERPRETER DESIGN
-- inspired by https://github.com/JacobLondon/InterpreterCalculator/blob/a8623488d734352d6754f9eff0a5a397f6d5dcb5/parse.py 


function isdigit(str)
	return str:match("^%d+$") ~= nil
end

function isalpha(str)
	return str:match("^[A-Za-z]+$") ~= nil
end



assert(isdigit('8'))
assert(isdigit('234674'))
assert(not isdigit('A'))
assert(not isdigit('a'))
assert(not isdigit('.'))
assert(not isdigit(' '))
assert(not isdigit('24R3'))

assert(isalpha('A'))
assert(isalpha('a'))
assert(isalpha('aAfvfqsdAR'))
assert(isalpha('foo'))
assert(not isalpha('aAfv1qsdAR2'))
assert(not isalpha('1'))
assert(not isalpha('9'))
assert(not isalpha('+'))
assert(not isalpha(' '))
assert(not isalpha('.'))



function Token(kind, value)
	return {
		kind = kind,
		value = value
	}
end

function Node(value, op, left, right)
	local node = {
		value = value,
		op = op,
		left = left,
		right = right,
	}
	node.isLeaf = function()
		return node.value ~= nil
	end

	return node
end

function Lexer(text)
	local index = 1
	local current = text:sub(index, index)
	
	local function advance()
		index = index + 1
	
		if index > #text then
		        current = nil
		else 
			current = text:sub(index, index)
		end
	end
	
	local function skip_whitespace()
	        while current ~= nil and current:sub(index, index) == ' ' do 
	      		self.advance()
	        end
	end
	
	local function number()
	        local digits = ''

		-- we also keep isalpha in the while. It means there is a number then an alpha char. We then throw error message in this case
	        while (not current == nil) or (isdigit(current)) or (current == '.') or (isalpha(current)) do
			-- error message if char inside number
			if isalpha(current) then 
				error("lex error, char in number : "..current)
			end
	      		digits = digits..current
	      		advance()
	      		-- current can become nil here. But fonction will continue so we return if it-s the case
	      		if current == nil then return tonumber(digits) end
	        end
	        return tonumber(digits)
	end

	local function string()
		local letters = ''

		-- we keep going if isdigit, because we allow identifier like foo123
		while (not current == nil) or (isalpha(current)) or (isdigit(current)) do
			letters = letters..current
			advance()
			if current == nil then return letters end
		end
		return letters
	end
	
	local function next_token()
		while current ~= nil do
			if current == ' ' then
			        advance()
			        goto continue
			end
		
			if isdigit(current) then 
			        return Token('NUM', number()) 
			end

			if isalpha(current) then 
			        return Token('STR', string()) 
			end
		
			if current == '*' then 
			        advance() 
			        return Token('MULT', '*') 
			end
			if current == '/' then 
			        advance()
			        return Token('DIV', '/') 
			end
			if current == '+' then 
			        advance()
			        return Token('PLUS', '+')
			end
		
			if current == '-' then 
			        advance()
			        return Token('MINUS', '-') 
			end
			if current == '(' then 
			        advance()
			        return Token('LPAR', '(') 
			end
			if current == ')' then 
			        advance()
			        return Token('RPAR', ')') 
			end
		
			error("invalid token : "..current)
		
			::continue::
		end
	return Token("EOF", "EOF")
	end
	
	return {
		next_token = next_token
	}
end


function Parser(lexer) 
	local current_token = lexer.next_token()

	function eat(token_kind) 
		if current_token.kind == token_kind then
			current_token = lexer.next_token()
		else 
			error("Invalid syntax")
		end
	end
	
	function factor()
		local token = current_token
		if token.kind == 'NUM' then
			eat('NUM')
			return Node(token.value, nil, nil, nil)
		elseif token.kind == 'LPAR' then
			eat('LPAR')
			local node = expr()
			eat('RPAR')
			return node
		end
	end

	function term()
		local node = factor()
		while current_token.kind == 'MULT' or current_token.kind == 'DIV' do
			local token = current_token
			if token.kind == 'MULT' then
				eat('MULT')
			elseif token.kind == 'DIV' then
				eat('DIV')
			end
			node = Node(nil, token.value, node, factor()) 
		end
		return node
	end

	function expr()
		local node = term()
		while current_token.kind == 'PLUS' or current_token.kind == 'MINUS' do
			local token = current_token
			if token.kind == 'PLUS' then
				eat('PLUS')

			elseif token.kind == 'MINUS' then
				eat('MINUS')
			end
			node = Node(nil, token.value, node, term())

		end
		return node 
	end

	function read()
		return expr()
	end

	return {
		eat = eat,
		factor = factor,
		term = term,
		expr = expr,
		read = read
	}
end

function eval(node)
	if node == nil then return end --careful here

	if node.isLeaf() then return node.value end

	local left_val = eval(node.left)
	local right_val = eval(node.right)

	if node.op == '+' then return left_val+right_val end
	if node.op == '-' then return left_val-right_val end
	if node.op == '*' then return left_val*right_val end
	if node.op == '/' then return left_val/right_val end

	error("unknown op "..node.op)

end


function test_lexer(program)
	local lex = Lexer(program)
	repeat
		local token = lex.next_token()
		print(token.kind.."    "..token.value)
	until (token.kind == "EOF")
end

test_lexer("jfam + 4577 * a24b +aaab41574")

local program = "(((5 + 3 * 2) * (9 - 4)) / (2 + 3)) + ((12 * 2) - (4 * 3)) + ((18 / 3 + 1) + (6 * 2)) - ((8 + 4 - 1 + 3) / (2 * 1))"

local lex = Lexer(program)
local parser = Parser(lex)
local node = parser.expr()
assert(eval(node) == 35)