{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveTraversable #-}
{- |
Parse an Intrinsics file and generate a Haskell interface to every intrinsic.
This is currently only tested and used for IntrinsicsX86.td
and relies on the flat structure of IntrinsicsX86.td.
In contrast to that, IntrinsicsPowerPC.td uses custom classes
and thus cannot be processed by this program.

A safer way would be to invoke the llvm-tblgen utility in some way.

1. We could write some Haskell or C++ code,
   that queries the intrinsics from the include/llvm/Intrinsics.h interface.

2. We could write a custom variant of llvm-tblgen
   with a back-end that creates the Haskell interface for intrinsics.
   This can be written in C++ or
   we have to call the TableGen library functions from Haskell somehow.

3. We could ask llvm-tblgen for a list of all records and parse its output.
   This requires no C++ coding,
   but we rely on the output format of @-print-records@.

   > llvm-tblgen -I /usr/local/llvm-3.1/include /usr/local/llvm-3.1/include/llvm/Intrinsics.td -print-records
-}
module Main where

import qualified Text.ParserCombinators.Parsec.Token as T
import qualified Text.ParserCombinators.Parsec.Language as L
import qualified Text.ParserCombinators.Parsec as Parsec
import Text.ParserCombinators.Parsec (CharParser, (<|>), )

import qualified Control.Monad.Trans.Writer as MW
import qualified Data.Map as M
import qualified Data.Set as S

import qualified Data.Traversable as Trav
import qualified Data.Foldable as Fold
import qualified Data.List.HT as ListHT
import qualified Data.List as List
import qualified Data.Char as Char
import Control.Monad (mzero, )
import Control.Functor.HT (void, )
import Data.Maybe (fromMaybe, )

import qualified System.IO as IO


data Intrinsic typ = Intrinsic Name Name (FunctionType typ)
   deriving (Show, Functor, Fold.Foldable, Trav.Traversable)

data FunctionType typ = FunctionType [typ] [typ]
   deriving (Show, Functor, Fold.Foldable, Trav.Traversable)

type Name = String

data QualName = QualName String String String

newtype LLVMType = LLVMType String
   deriving (Show)

newtype HaskellType = HaskellType {haskellTypeDecons :: String}
   deriving (Show, Eq, Ord)


gccBuiltinPrefix :: String
gccBuiltinPrefix = "__builtin_ia32_"

unsignedFunctions :: S.Set String
unsignedFunctions = S.fromList $ map (gccBuiltinPrefix++) $
   "packusdw128" :
   "packusdw256" :
   "packuswb128" :
   "packuswb256" :
   "paddusb128" :
   "paddusb256" :
   "paddusw128" :
   "paddusw256" :
   "phminposuw128" :
   "pmaddubsw128" :
   "pmaddubsw256" :
   "pmaxub128" :
   "pmaxub256" :
   "pmaxud128" :
   "pmaxud256" :
   "pmaxuw128" :
   "pmaxuw256" :
   "pminub128" :
   "pminub256" :
   "pminud128" :
   "pminud256" :
   "pminuw128" :
   "pminuw256" :
   "pmulhuw128" :
   "pmulhuw256" :
   "pmuludq128" :
   "pmuludq256" :
   "psubusb128" :
   "psubusb256" :
   "psubusw128" :
   "psubusw256" :
   "vphaddubd" :
   "vphaddubq" :
   "vphaddubw" :
   "vphaddudq" :
   "vphadduwd" :
   "vphadduwq" :
   -- it's only the flag set that is unsigned
   -- the floating point operands are always signed
   "roundps" :
   "roundpd" :
   "roundps256" :
   "roundpd256" :
   "roundss" :
   "roundsd" :
   "cmpps" :
   "cmppd" :
   "cmpps256" :
   "cmppd256" :
   "cmpss" :
   "cmpsd" :
   []

translateType ::
   Bool -> LLVMType ->
   MW.Writer (M.Map HaskellType HaskellType) HaskellType
translateType signed (LLVMType llvmTypeStr) =
   let formatQType (mqual, typ) =
          maybe "" (++".") mqual ++ typ
       returnType shortType longType = do
          MW.tell (M.singleton shortType longType)
          return shortType
       composedType = do
          vec <- Parsec.optionMaybe $ do
             void $ Parsec.char 'v'
             Parsec.many1 Parsec.digit
          prim <- Parsec.choice $
             (do void $ Parsec.char 'i'
                 fmap
                    (\n ->
                       if signed
                         then (Just "I", "Int"++n)
                         else (Just "W", "Word"++n)) $
                    Parsec.many1 Parsec.digit) :
             (do void $ Parsec.char 'f'
                 n <- Parsec.many1 Parsec.digit
                 case n of
                    "32" -> return (Nothing, "Float")
                    "64" -> return (Nothing, "Double")
                    _ -> return $ (Just "LLVM", "FP" ++ n)) :
             []
          return $
             case vec of
                Nothing -> return $ HaskellType $ "LLVM.Value " ++ formatQType prim
                Just d ->
                   returnType
                      (HaskellType $ "V" ++ d ++ snd prim)
                      (HaskellType $
                          "LLVM.Value (LLVM.Vector TypeNum.D" ++
                          d ++ " " ++ formatQType prim ++ ")")
       p = do
          void $ Parsec.string "llvm_"
          haskType <- Parsec.choice $
             (Parsec.string "x86mmx" >>
              return (returnType (HaskellType "MMX") (HaskellType "LLVM.Value (LLVM.Vector TypeNum.D8 W.Word8)"))) :
             (Parsec.string "ptr" >>
              return (return (HaskellType "LLVM.Value (Ptr ())"))) :
             composedType :
             []
          void $ Parsec.string "_ty"
          return haskType
   in  case Parsec.parse p "" llvmTypeStr of
          Left _msg ->
             let typeSyn = HaskellType $
                    case llvmTypeStr of
                       c:cs -> Char.toUpper c : cs
                       _ -> ""
             in do
                 MW.tell (M.singleton typeSyn (HaskellType "LLVM.Value ()"))
                 return typeSyn
          Right act -> act

splitName :: Name -> QualName
splitName name =
   let p = do
          void $ Parsec.string "int_"
          arch <- Parsec.many1 Parsec.alphaNum
          void $ Parsec.char '_'
          feature <- Parsec.many1 Parsec.alphaNum
          void $ Parsec.char '_'
          stem <- Parsec.many1 Parsec.anyChar
          return $ QualName arch feature stem
   in  case Parsec.parse p "" name of
          Left _msg -> QualName "" "" name
          Right qname -> qname

featureMap :: M.Map String String
featureMap = M.fromList $
   ("sse", "sse1") :
   ("aesni", "aes") :
   ("3dnow", "amd3dnow") :
   ("3dnowa", "amd3dnowa") :
   []

formatIntrinsicInHaskell :: Intrinsic HaskellType -> String
formatIntrinsicInHaskell
      (Intrinsic name gccblt (FunctionType parameters results)) =
   let (QualName _arch feature stem) = splitName name
       dotStem = map (\c -> case c of '_' -> '.'; _ -> c) stem
       haskName =
          fromMaybe gccblt $
          ListHT.maybePrefixOf gccBuiltinPrefix gccblt
       resultStr =
          if null results
            then "LLVM.Value ()"
            else List.intercalate ", " $ map haskellTypeDecons results
   in  unlines $
          (haskName ++ " :: Ext.T (" ++
           concatMap (\(HaskellType typ) -> typ ++ " -> ") parameters ++
           "LLVM.CodeGenFunction r (" ++ resultStr ++ "))") :
          (haskName ++ " = Ext.intrinsic ExtX86." ++
           M.findWithDefault feature feature featureMap ++
           " " ++ show dotStem) :
          []

convertIntrinsics :: [Intrinsic LLVMType] -> String
convertIntrinsics intrinsics =
   unlines $
   "{- Do not edit! This file was created with the PrepareIntrinsics tool. -}" :
   "module LLVM.Extra.Extension.X86Auto where" :
   "" :
   "import qualified LLVM.Extra.Extension as Ext" :
   "import qualified LLVM.Extra.ExtensionCheck.X86 as ExtX86" :
   "import qualified LLVM.Core as LLVM" :
   "import qualified Types.Data.Num as TypeNum" :
   "import qualified Data.Int as I" :
   "import qualified Data.Word as W" :
   "import Foreign.Ptr (Ptr, )" :
   "" :
   case MW.runWriter $
        mapM (\intr@(Intrinsic _ gccblt _) ->
                 Trav.traverse (translateType (not $ S.member gccblt unsignedFunctions)) intr) $
        filter (\(Intrinsic _ gccblt _) -> not $ null gccblt) intrinsics of
      (funcs, types) ->
         (map (\(HaskellType short, HaskellType long) ->
                  "type " ++ short ++ " = " ++ long) $
          M.toList types) ++
         "" :
         (map formatIntrinsicInHaskell funcs)

lexer :: T.TokenParser st
lexer =
   T.makeTokenParser $ L.emptyDef {
      L.commentStart = "/*",
      L.commentEnd = "*/",
      L.commentLine = "//",
      L.nestedComments = False,
      L.identStart = identifierStart,
      L.identLetter = identifierLetter,
      L.opStart = mzero,
      L.opLetter = mzero,
      L.caseSensitive = True,
      L.reservedNames = [ "let", "def", "in" ],
      L.reservedOpNames = [ "=", ":", "," ]
      }

identifierStart, identifierLetter :: CharParser st Char
identifierStart = Parsec.letter <|> Parsec.char '_'

identifierLetter =
   Parsec.alphaNum <|> Parsec.char '_' <|> Parsec.char '.'


gccBuiltin :: CharParser st String
gccBuiltin = do
   T.reserved lexer "GCCBuiltin"
   T.angles lexer $ T.stringLiteral lexer

llvmType :: CharParser st LLVMType
llvmType = fmap LLVMType $ T.identifier lexer

intrinsic :: CharParser st (FunctionType LLVMType)
intrinsic =
   Parsec.between (T.reserved lexer "Intrinsic") (T.semi lexer) $
         T.angles lexer $ do
      results <- T.brackets lexer $ T.commaSep lexer llvmType
      void $ T.comma lexer
      parameters <- T.brackets lexer $ T.commaSep lexer llvmType
      Parsec.optional $ do
         void $ T.comma lexer
         _attributes <- T.brackets lexer $ T.commaSep lexer $ T.identifier lexer
         return ()
      return $ FunctionType parameters results

letBlock :: CharParser st [Intrinsic LLVMType]
letBlock = do
   T.reserved lexer "let"
   T.reserved lexer "TargetPrefix"
   void $ T.symbol lexer "="
   _prefix <- T.stringLiteral lexer
   T.reserved lexer "in"
   T.braces lexer $ Parsec.many $ do
      T.reserved lexer "def"
      name <- T.identifier lexer
      void $ T.colon lexer
      gccblt <- Parsec.option "" $ do
         gccblt <- gccBuiltin
         void $ T.comma lexer
         return gccblt
      intr <- intrinsic
      return $ Intrinsic name gccblt intr

parser :: CharParser st [Intrinsic LLVMType]
parser =
   fmap concat $ Parsec.many1 letBlock

main :: IO ()
main = do
   parsed <-
      Parsec.parseFromFile (T.whiteSpace lexer >> parser)
         "/usr/local/llvm-3.1/include/llvm/IntrinsicsX86.td"
   case parsed of
      Left msg -> IO.hPutStrLn IO.stderr $ show msg
      Right intrinsics ->
         writeFile "src/LLVM/Extra/Extension/X86Auto.hs" $ convertIntrinsics intrinsics
