-- Parsec parser of tokenized Edoc XML (tokenization made by Tagsoup).

module Language.Edoc.Xml2Hs.Parser where

import Text.HTML.TagSoup
import Text.HTML.TagSoup.Parser
import Text.HTML.TagSoup.Match
import Text.HTML.TagSoup.Type
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos

import Language.Edoc.Xml2Hs.Type

data PState = PState {
  counter :: Int                       -- unique numbers producer
}

-- Token primitives to distinguish between opening and closing tags.

-- Match an opening tag.

openTag s = token show (\_ -> newPos "" 0 0) (\tag -> 
  case tag of
    TagOpen ts _ -> if ts == s then Just tag else Nothing
    _ -> Nothing)

expTag s = openTag s <?> s

-- Match a closing tag.

closeTag s = token show (\_ -> newPos "" 0 0) (\tag ->
  case tag of
    TagClose ts -> if ts == s then Just tag else Nothing
    _ -> Nothing)

-- Get a text element in case it is not ignored.

getText = token show (\_ -> newPos "" 0 0) (\tag ->
  case tag of
    TagText txt -> Just txt
    _ -> Nothing)


-- Ignore these tags. Anything else (like TagText) is also ignored.
-- This parser succeeds when a token HAS to be ignored.

ignore = token show (\_ -> newPos "" 0 0) (\tag ->
  let ignored = ["?xml", "package", "modules", "description", "briefDescription",
                 "fullDescription", "copyright", "version", "since", "deprecated",
                 "see", "reference", "todo", "behaviour", "callbacks", "callback",
                 "expr", "author", "p"]
  in  case tag of
        TagOpen s _ | s `elem` ignored -> Just ()
        TagOpen _ _ -> Nothing
        TagClose s | s `elem` ignored -> Just ()
        TagClose _ -> Nothing
        _ -> Just ())

-- Parse a whole module definition.

pDocMod :: GenParser Tag PState EDocMod

pDocMod = do
  skipMany ignore
  edm <- expTag "module"
  ags <- many pArgs
  skipMany ignore
  tds <- many pTypeDecls
  fds <- pFunctions
  closeTag "module"
  return $ EDocMod {
    em_name = fromAttrib "name" edm
   ,em_args = concat ags
   ,em_tdefs = concat tds
   ,em_funcs = fds}
  
-- Parse arguments.

pArgs = do
  expTag "args"
  as <- many pArg
  closeTag "args"
  return as

pArg = do
  expTag "arg"
  an <- pArgName
  manyTill anyToken (closeTag "arg")
  return an

pArgName = do
  expTag "argName"
  tx <- getText
  closeTag "argName"
  return tx

-- Parse function declarations.

pFunctions = do
  expTag "functions"
  fds <- many1 pFunDecl
  closeTag "functions"
  return fds

pFunDecl = do
  fn <- expTag "function"
  ats <- pArgs
  tss <- many pTypeSpec
  manyTill anyToken (closeTag "function")
  return $ EFunDecl {
    ef_name = fromAttrib "name" fn
   ,ef_arity = read $ fromAttrib "arity" fn
   ,ef_args = ats
   ,ef_tspec = tss}

-- Parse type declarations.

pTypeDecls = do
  expTag "typedecls"
  tds <- many1 pTypeDecl
  closeTag "typedecls"
  return tds

-- Parse a single toplevel type declaration which is a TypeDef wrapped into
-- a TypeDecl.

pTypeDecl = do
  expTag "typedecl"
  td <- pTypeDef
  skipMany ignore
  closeTag "typedecl"
  return td

-- Parse a local definition.

pLocDef = do
  expTag "localdef"
  tta <- pTypevar <|> pAbstype
  t <- pErlType
  closeTag "localdef"
  return $ ELocDef tta t

-- Parse a type definition.

pTypeDef = do
  expTag "typedef"
  en <- pErlName
  ats <- pArgTypes
  t1 <- many pErlType
  lds <- many pLocDef
  closeTag "typedef"
  return $ ETypeDef {
    td_ename = en
   ,td_argtypes = ats
   ,td_type = if null t1 then ENothing else head t1
   ,td_ldef = lds}

-- Parse a type specification

pTypeSpec = do
  expTag "typespec"
  en <- pErlName
  t1 <- many pErlType
  lds <- many pLocDef
  closeTag "typespec"
  return $ ETypeSpec {
    ts_ename = en
   ,ts_type = if null t1 then ENothing else head t1
   ,ts_ldef = lds}

-- Parse an Erlang Name.

pErlName = do
  enm <- expTag "erlangName"
  let app = fromAttrib "app" enm
      mod = fromAttrib "module" enm
      nam = fromAttrib "name" enm
  closeTag "erlangName"
  return $ ErlName {en_app = app, en_mod = mod, en_name = nam}

-- Parse argtypes.

pArgTypes = try $ do
  expTag "argtypes"
  ats <- many pErlType
  closeTag "argtypes"
  return ats


-- Parse a single type token. It may be either a constant or a type.

pErlType = pType <|> pConst

pConst = try $ do
  expTag "const"
  c <- pInteger <|> pFloat <|> pAtom
  closeTag "const"
  return c
  

pType = try $ do 
  expTag "type"
  t <- simples <|> pTypevar <|> pAtom <|> pInteger <|> pFloat <|> pRange
   <|> pList <|> pTuple <|> pFun <|> pRecord <|> pRecRef <|> pAbstype <|> pUnion
  closeTag "type"
  return t

-- Possible variants of Erlang types.

pSimple (t, c) = try $ do
  expTag t
  closeTag t
  return c

abstype s = EAbsType (ErlName "" "" s) []

simples = foldr1 (<|>) $ map pSimple [
  ("any", EAny), ("nil", ENil), ("pid", abstype "pid"), ("port", abstype "port"),
  ("maybe_improper_list", ENil), ("nonempty_maybe_improper_list", ENil),
  ("string", abstype "string"), ("nonempty_string", abstype "string"),
  ("no_return", ENothing), ("number", ENum), ("bool", abstype "bool"),
  ("integer", abstype "integer"), ("float", abstype "float"),
  ("pos_integer", abstype "integer"), ("non_neg_integer", abstype "integer")]
 
pUnion = try $ do
  expTag "union"
  uns <- many1 pErlType
  closeTag "union"
  return $ EUnion uns

pTypevar = try $ do
  v <- expTag "typevar"
  let n = fromAttrib "name" v
  closeTag "typevar"
  return $ ETypeVar n
  
pAtom = try $ do
  a <- expTag "atom"
  let v = fromAttrib "value" a
  closeTag "atom"
  return $ EAtom v

pInteger = try $ do
  i <- expTag "integer"
  let n = read $ fromAttrib "value" i
  closeTag "integer"
  return $ EInteger n
 
pFloat = try $ do
  f <- expTag "float"
  let v = read $ fromAttrib "value" f
  closeTag "float"
  return $ EFloat v

-- Several tags open a list. It should close with the same tag.
 
pList = try $ do
  TagOpen tag _ <- foldr1 (<|>) $ map expTag ["list", "nonempty_list"]
  t <- pErlType
  closeTag tag
  return $ EList t

-- Range type indeed corresponds an integer type; values
-- themselves are ignored.

pRange = try $ do
  expTag "range"
  i1 <- pConst
  i2 <- pConst
  closeTag "range"
  return $ EInteger 0

pTuple = try $ do
  expTag "tuple"
  ts <- many pErlType
  closeTag "tuple"
  return $ ETuple ts

pFun = try $ do
  expTag "fun"
  as <- pArgTypes
  t <- pErlType
  closeTag "fun"
  return $ EFun as t

pRecRef = try $ do
  expTag "recref"
  EAtom a <- pConst
  closeTag "recref"
  return $ ERecord a []

pRecord = try $ do
  expTag "record"
  EAtom a <- pAtom
  fs <- many pField
  closeTag "record"
  return $ ERecord a fs

pField = try $ do
  expTag "field"
  EAtom a <- pAtom
  t <- pErlType
  closeTag "field"
  return (a, t)

pAbstype = try $ do
  expTag "abstype"
  en <- pErlName
  ts <- many pErlType
  closeTag "abstype"
  return $ EAbsType en ts



