-- Copyright 2006 by Wolfram Kahl, all rights reserved

module SImPLLexer where

import SImPLTokens
import Char

data Token
    = TokBinOp BinOp
    | TokUnOp  UnaryOp
    | TokType  Type
    | TokLit   Literal
    | TokIdent String
    | TokSep   Char
    | TokASSIGN
    | TokIF
    | TokTHEN
    | TokELSE
    | TokWHILE
    | TokDO
    | TokPROGRAM

lexSImPL :: String -> [Token]
lexSImPL [] = []
lexSImPL ('+'       : cs) = TokBinOp (MkArithOp Plus        ) : lexSImPL cs
lexSImPL ('-'       : cs) = TokBinOp (MkArithOp Minus       ) : lexSImPL cs
lexSImPL ('*'       : cs) = TokBinOp (MkArithOp Times       ) : lexSImPL cs
lexSImPL ('/'       : cs) = TokBinOp (MkArithOp Div         ) : lexSImPL cs
lexSImPL ('>' : '=' : cs) = TokBinOp (MkRelOp GreaterOrEqual) : lexSImPL cs
lexSImPL ('<' : '=' : cs) = TokBinOp (MkRelOp LessOrEqual   ) : lexSImPL cs
lexSImPL ('=' : '=' : cs) = TokBinOp (MkRelOp Equal         ) : lexSImPL cs
lexSImPL ('!' : '=' : cs) = TokBinOp (MkRelOp NotEqual      ) : lexSImPL cs
lexSImPL ('&' : '&' : cs) = TokBinOp (MkBoolOp And          ) : lexSImPL cs
lexSImPL ('|' : '|' : cs) = TokBinOp (MkBoolOp Or           ) : lexSImPL cs
lexSImPL ('>'       : cs) = TokBinOp (MkRelOp Greater       ) : lexSImPL cs
lexSImPL ('<'       : cs) = TokBinOp (MkRelOp Less          ) : lexSImPL cs
lexSImPL ('!'       : cs) = TokUnOp   Not                     : lexSImPL cs
lexSImPL (':' : '=' : cs) = TokASSIGN                         : lexSImPL cs

lexSImPL (c:cs)
  | isDigit c  =  lexNumber [c] cs
  | isAlpha c  =  lexIdent  [c] cs
  | isSep   c  =  TokSep c : lexSImPL cs
  | isSpace c  =  lexSImPL cs
  | otherwise  =  error ("lexSImPL: illegal character: " ++ take 20 (c:cs))

lexNumber :: String -> String -> [Token]
lexNumber prefix (c:cs) | isDigit c     =  lexNumber (prefix ++ [c]) cs
lexNumber prefix s                      =  TokLit (LitInt (read prefix)) : lexSImPL s

-- with identifiers, we have to check for keywords:
lexIdent :: String -> String -> [Token]
lexIdent  prefix (c:cs) | isAlphaNum c  =  lexIdent (prefix ++ [c]) cs
lexIdent  prefix s                      =  tok : lexSImPL s
  where
    tok = case prefix of
     "true"    -> TokLit (LitBool True)
     "false"   -> TokLit (LitBool False)
     "int"     -> TokType IntType
     "bool"    -> TokType BoolType
     "if"      -> TokIF
     "then"    -> TokTHEN
     "else"    -> TokELSE
     "while"   -> TokWHILE
     "do"      -> TokDO
     "program" -> TokPROGRAM
     _         -> TokIdent prefix

isSep c = c `elem` "(){};,"


--

instance Show Token where
  show (TokBinOp op) = show op
  show (TokUnOp op) = show op
  show (TokType ty) = show ty
  show (TokLit lit) = show lit
  show (TokIdent var) = var
  show (TokSep c) = [c]
  show TokASSIGN = ":="
  show TokIF = "if"
  show TokTHEN = "then"
  show TokELSE = "else"
  show TokWHILE = "while"
  show TokDO = "do"
  show TokPROGRAM = "program"
