-- grammar :
-- expr → term ((PLUS | MINUS) term)*
-- term → factor ((MULT | DIV) factor)*
-- factor → NUM | LPAR expr RPAR
dragon book grammar
L -> I;L | eof
I -> id = E
E -> T+E | T-E | T
T -> F*T | F/T | F
F -> id | nb | (E)
simple calculator
-- INTERPRETER DESIGN
-- inspired by https://github.com/JacobLondon/InterpreterCalculator/blob/a8623488d734352d6754f9eff0a5a397f6d5dcb5/parse.py
function isdigit(char)
-- careful :
return char:match("%d") ~= nil
end
function print_table(table)
for i=1, #table do
io.write(table[i])
end
io.write("\n")
end
function Token(kind, value)
return {
kind = kind,
value = value
}
end
function Lexer(text)
local index = 1
local current = text:sub(index, index)
local function advance()
index = index + 1
if index > #text then
current = nil
else
current = text:sub(index, index)
end
end
local function skip_whitespace()
while current ~= nil and current:sub(index, index) == ' ' do
self.advance()
end
end
--
local function number()
local digits = ''
while (not current == nil) or (isdigit(current)) or (current == '.') do
digits = digits..current
advance()
-- current can become nil here. But fonction will continue so we return if it-s the case
if current == nil then return tonumber(digits) end
end
return tonumber(digits)
end
local function next_token()
while current ~= nil do
if current == ' ' then
advance()
goto continue
end
if isdigit(current) then
return Token('NUM', number())
end
if current == '*' then
advance()
return Token('MULT', '*')
end
if current == '/' then
advance()
return Token('DIV', '/')
end
if current == '+' then
advance()
return Token('PLUS', '+')
end
if current == '-' then
advance()
return Token('MINUS', '-')
end
if current == '(' then
advance()
return Token('LPAR', '(')
end
if current == ')' then
advance()
return Token('RPAR', ')')
end
error("invalid token")
::continue::
end
return Token("EOF", "EOF")
end
return {
advance = advance,
skip_whitespace = skip_whitespace,
number = number,
next_token = next_token,
get_current = function() return current end,
get_index = function() return index end
}
end
-- grammar :
-- expr → term ((PLUS | MINUS) term)*
-- term → factor ((MULT | DIV) factor)*
-- factor → NUM | LPAR expr RPAR
--
function Parser(lexer)
local current_token = lexer.next_token()
function eat(token_kind)
if current_token.kind == token_kind then
current_token = lexer.next_token()
else
error("Invalid syntax")
end
end
function factor()
local token = current_token
if token.kind == 'NUM' then
eat('NUM')
return token.value
elseif token.kind == 'LPAR' then
eat('LPAR')
local result = expr()
eat('RPAR')
return result
end
end
function term()
local result = factor()
while current_token.kind == 'MULT' or current_token.kind == 'DIV' do
local token = current_token
if token.kind == 'MULT' then
eat('MULT')
result = result*factor()
elseif token.kind == 'DIV' then
eat('DIV')
local divider = factor()
if divider == 0 then error("dividing by 0") end
result = result/divider
end
end
return result
end
function expr()
local result = term()
while current_token.kind == 'PLUS' or current_token.kind == 'MINUS' do
local token = current_token
if token.kind == 'PLUS' then
eat('PLUS')
result = result+term()
elseif token.kind == 'MINUS' then
eat('MINUS')
result = result - term()
end
end
return result
end
function read()
return expr
end
return {
eat = eat,
factor = factor,
term = term,
expr = expr,
read = read
}
end
program = "22 + 3.5 * ( 4 - 1) "
-- TESTS
-- isdigit()
assert(isdigit("1"))
assert(not isdigit("A"))
assert(not isdigit("."))
assert(not isdigit("+"))
assert(not isdigit(" "))
assert(isdigit("9"))
assert(isdigit("0"))
-- Token
local tok = Token('NUM', 72)
assert(tok.kind == 'NUM' and tok.value == 72)
-- Lexer
local lex = Lexer(program)
for i=1, #program do
assert(lex.get_index() == i)
assert(lex.get_current() == program:sub(i,i))
lex.advance()
end
local lex = Lexer(program)
repeat
tok = lex.next_token()
until tok.kind == "EOF"
-- Parser
local lex2 = Lexer(program)
local parser = Parser(lex2)
assert(parser.expr() == 32.5)
local program2 = "(((5 + 3) * (9 - 4)) / (2 + 3)) + ((12 * 2) - (4 * 3)) + ((18 / 3) + (6 * 2)) - ((8 + 4) / (2 * 1))"
local lex3 = Lexer(program2)
local parser2 = Parser(lex3)
assert(parser2.expr() == 32)
local program3 = "5+1*92"
local lex4 = Lexer(program3)
local parser3 = Parser(lex4)
assert(parser3.expr() == 97)
Calculator with AST
-- INTERPRETER DESIGN
-- inspired by https://github.com/JacobLondon/InterpreterCalculator/blob/a8623488d734352d6754f9eff0a5a397f6d5dcb5/parse.py
function isdigit(char)
return char:match("%d") ~= nil
end
function Token(kind, value)
return {
kind = kind,
value = value
}
end
function Node(value, op, left, right)
local node = {
value = value,
op = op,
left = left,
right = right,
}
node.isLeaf = function()
return node.value ~= nil
end
return node
end
function Lexer(text)
local index = 1
local current = text:sub(index, index)
local function advance()
index = index + 1
if index > #text then
current = nil
else
current = text:sub(index, index)
end
end
local function skip_whitespace()
while current ~= nil and current:sub(index, index) == ' ' do
self.advance()
end
end
local function number()
local digits = ''
while (not current == nil) or (isdigit(current)) or (current == '.') do
digits = digits..current
advance()
-- current can become nil here. But fonction will continue so we return if it-s the case
if current == nil then return tonumber(digits) end
end
return tonumber(digits)
end
local function next_token()
while current ~= nil do
if current == ' ' then
advance()
goto continue
end
if isdigit(current) then
return Token('NUM', number())
end
if current == '*' then
advance()
return Token('MULT', '*')
end
if current == '/' then
advance()
return Token('DIV', '/')
end
if current == '+' then
advance()
return Token('PLUS', '+')
end
if current == '-' then
advance()
return Token('MINUS', '-')
end
if current == '(' then
advance()
return Token('LPAR', '(')
end
if current == ')' then
advance()
return Token('RPAR', ')')
end
error("invalid token")
::continue::
end
return Token("EOF", "EOF")
end
return {
advance = advance,
skip_whitespace = skip_whitespace,
number = number,
next_token = next_token
}
end
function Parser(lexer)
local current_token = lexer.next_token()
function eat(token_kind)
if current_token.kind == token_kind then
current_token = lexer.next_token()
else
error("Invalid syntax")
end
end
function factor()
local token = current_token
if token.kind == 'NUM' then
eat('NUM')
return Node(token.value, nil, nil, nil)
elseif token.kind == 'LPAR' then
eat('LPAR')
local node = expr()
eat('RPAR')
return node
end
end
function term()
local node = factor()
while current_token.kind == 'MULT' or current_token.kind == 'DIV' do
local token = current_token
if token.kind == 'MULT' then
eat('MULT')
elseif token.kind == 'DIV' then
eat('DIV')
end
node = Node(nil, token.value, node, factor())
end
return node
end
function expr()
local node = term()
while current_token.kind == 'PLUS' or current_token.kind == 'MINUS' do
local token = current_token
if token.kind == 'PLUS' then
eat('PLUS')
elseif token.kind == 'MINUS' then
eat('MINUS')
end
node = Node(nil, token.value, node, term())
end
return node
end
function read()
return expr()
end
return {
eat = eat,
factor = factor,
term = term,
expr = expr,
read = read
}
end
function eval(node)
if node == nil then return end --careful here
if node.isLeaf() then return node.value end
local left_val = eval(node.left)
local right_val = eval(node.right)
if node.op == '+' then return left_val+right_val end
if node.op == '-' then return left_val-right_val end
if node.op == '*' then return left_val*right_val end
if node.op == '/' then return left_val/right_val end
error("unknown op")
end
local program = "(((5 + 3 * 2) * (9 - 4)) / (2 + 3)) + ((12 * 2) - (4 * 3)) + ((18 / 3 + 1) + (6 * 2)) - ((8 + 4 - 1 + 3) / (2 * 1))"
-- Parser
local lex2 = Lexer(program)
local parser = Parser(lex2)
local node = parser.expr()
assert(eval(node) == 35)
Calculator + AST + lexer knows identifiers
-- INTERPRETER DESIGN
-- inspired by https://github.com/JacobLondon/InterpreterCalculator/blob/a8623488d734352d6754f9eff0a5a397f6d5dcb5/parse.py
function isdigit(str)
return str:match("^%d+$") ~= nil
end
function isalpha(str)
return str:match("^[A-Za-z]+$") ~= nil
end
assert(isdigit('8'))
assert(isdigit('234674'))
assert(not isdigit('A'))
assert(not isdigit('a'))
assert(not isdigit('.'))
assert(not isdigit(' '))
assert(not isdigit('24R3'))
assert(isalpha('A'))
assert(isalpha('a'))
assert(isalpha('aAfvfqsdAR'))
assert(isalpha('foo'))
assert(not isalpha('aAfv1qsdAR2'))
assert(not isalpha('1'))
assert(not isalpha('9'))
assert(not isalpha('+'))
assert(not isalpha(' '))
assert(not isalpha('.'))
function Token(kind, value)
return {
kind = kind,
value = value
}
end
function Node(value, op, left, right)
local node = {
value = value,
op = op,
left = left,
right = right,
}
node.isLeaf = function()
return node.value ~= nil
end
return node
end
function Lexer(text)
local index = 1
local current = text:sub(index, index)
local function advance()
index = index + 1
if index > #text then
current = nil
else
current = text:sub(index, index)
end
end
local function skip_whitespace()
while current ~= nil and current:sub(index, index) == ' ' do
self.advance()
end
end
local function number()
local digits = ''
-- we also keep isalpha in the while. It means there is a number then an alpha char. We then throw error message in this case
while (not current == nil) or (isdigit(current)) or (current == '.') or (isalpha(current)) do
-- error message if char inside number
if isalpha(current) then
error("lex error, char in number : "..current)
end
digits = digits..current
advance()
-- current can become nil here. But fonction will continue so we return if it-s the case
if current == nil then return tonumber(digits) end
end
return tonumber(digits)
end
local function string()
local letters = ''
-- we keep going if isdigit, because we allow identifier like foo123
while (not current == nil) or (isalpha(current)) or (isdigit(current)) do
letters = letters..current
advance()
if current == nil then return letters end
end
return letters
end
local function next_token()
while current ~= nil do
if current == ' ' then
advance()
goto continue
end
if isdigit(current) then
return Token('NUM', number())
end
if isalpha(current) then
return Token('STR', string())
end
if current == '*' then
advance()
return Token('MULT', '*')
end
if current == '/' then
advance()
return Token('DIV', '/')
end
if current == '+' then
advance()
return Token('PLUS', '+')
end
if current == '-' then
advance()
return Token('MINUS', '-')
end
if current == '(' then
advance()
return Token('LPAR', '(')
end
if current == ')' then
advance()
return Token('RPAR', ')')
end
error("invalid token : "..current)
::continue::
end
return Token("EOF", "EOF")
end
return {
next_token = next_token
}
end
function Parser(lexer)
local current_token = lexer.next_token()
function eat(token_kind)
if current_token.kind == token_kind then
current_token = lexer.next_token()
else
error("Invalid syntax")
end
end
function factor()
local token = current_token
if token.kind == 'NUM' then
eat('NUM')
return Node(token.value, nil, nil, nil)
elseif token.kind == 'LPAR' then
eat('LPAR')
local node = expr()
eat('RPAR')
return node
end
end
function term()
local node = factor()
while current_token.kind == 'MULT' or current_token.kind == 'DIV' do
local token = current_token
if token.kind == 'MULT' then
eat('MULT')
elseif token.kind == 'DIV' then
eat('DIV')
end
node = Node(nil, token.value, node, factor())
end
return node
end
function expr()
local node = term()
while current_token.kind == 'PLUS' or current_token.kind == 'MINUS' do
local token = current_token
if token.kind == 'PLUS' then
eat('PLUS')
elseif token.kind == 'MINUS' then
eat('MINUS')
end
node = Node(nil, token.value, node, term())
end
return node
end
function read()
return expr()
end
return {
eat = eat,
factor = factor,
term = term,
expr = expr,
read = read
}
end
function eval(node)
if node == nil then return end --careful here
if node.isLeaf() then return node.value end
local left_val = eval(node.left)
local right_val = eval(node.right)
if node.op == '+' then return left_val+right_val end
if node.op == '-' then return left_val-right_val end
if node.op == '*' then return left_val*right_val end
if node.op == '/' then return left_val/right_val end
error("unknown op "..node.op)
end
function test_lexer(program)
local lex = Lexer(program)
repeat
local token = lex.next_token()
print(token.kind.." "..token.value)
until (token.kind == "EOF")
end
test_lexer("jfam + 4577 * a24b +aaab41574")
local program = "(((5 + 3 * 2) * (9 - 4)) / (2 + 3)) + ((12 * 2) - (4 * 3)) + ((18 / 3 + 1) + (6 * 2)) - ((8 + 4 - 1 + 3) / (2 * 1))"
local lex = Lexer(program)
local parser = Parser(lex)
local node = parser.expr()
assert(eval(node) == 35)