m-chrzan.xyz
aboutsummaryrefslogtreecommitdiff
path: root/src/lexer.js
blob: fd0d0a34da122f4dbcd3826565dfcefbe42ba081 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
const lexemeTypes = []

const newLexemeType = (type, regex, adder) => {
  lexemeTypes.push({
    type,
    regex: new RegExp(`^(${regex})(.*)$`),
    adder: adder || (lexemes => { lexemes.push({ type }) })
  })
}

const newValueLexeme = (type, regex, converter = v => v) => {
  newLexemeType(type, regex, (lexemes, value) => {
    lexemes.push({ type, value: converter(value) })
  })
}

const newSkippableLexeme = (type, regex) => {
  newLexemeType(type, regex, () => {})
}

newValueLexeme('constant', '\\d+', Number)
newLexemeType('d', 'd')
newLexemeType('+', '\\+')
newLexemeType('-', '-')
newLexemeType('(', '\\(')
newLexemeType(')', '\\)')
newLexemeType('E', 'E')
newLexemeType('K', 'K')
newSkippableLexeme('whitespace', '\\s+')

const lex = (expressionString) => {
  let lexemes = []

  while (expressionString.length > 0) {
    let matched = false

    lexemeTypes.forEach(lexemeType => {
      let matches = lexemeType.regex.exec(expressionString)

      if (matches) {
        matched = true
        lexemeType.adder(lexemes, matches[1])
        expressionString = matches[2]
      }
    })

    if (!matched) {
      throw new Error('Syntax error: unrecognized token')
    }
  }

  return lexemes
}

exports.lex = lex