1 {-# LANGUAGE CPP, ForeignFunctionInterface #-}
    2 {-# OPTIONS_GHC -XUnliftedFFITypes -XMagicHash -XUnboxedTuples -XDeriveDataTypeable #-}
    3 -- |
    4 -- Module      : Data.ByteString.Internal
    5 -- License     : BSD-style
    6 -- Maintainer  : Don Stewart <dons@galois.com>
    7 -- Stability   : experimental
    8 -- Portability : portable
    9 -- 
   10 -- A module containing semi-public 'ByteString' internals. This exposes
   11 -- the 'ByteString' representation and low level construction functions.
   12 -- Modules which extend the 'ByteString' system will need to use this module
   13 -- while ideally most users will be able to make do with the public interface
   14 -- modules.
   15 --
   16 module Data.ByteString.Internal (
   17 
   18         -- * The @ByteString@ type and representation
   19         ByteString(..),         -- instances: Eq, Ord, Show, Read, Data, Typeable
   20 
   21         -- * Low level introduction and elimination
   22         create,                 -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
   23         createAndTrim,          -- :: Int -> (Ptr Word8 -> IO Int) -> IO  ByteString
   24         createAndTrim',         -- :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
   25         unsafeCreate,           -- :: Int -> (Ptr Word8 -> IO ()) ->  ByteString
   26         mallocByteString,       -- :: Int -> IO (ForeignPtr a)
   27 
   28         -- * Conversion to and from ForeignPtrs
   29         fromForeignPtr,         -- :: ForeignPtr Word8 -> Int -> Int -> ByteString
   30         toForeignPtr,           -- :: ByteString -> (ForeignPtr Word8, Int, Int)
   31 
   32         -- * Utilities
   33         inlinePerformIO,        -- :: IO a -> a
   34         nullForeignPtr,         -- :: ForeignPtr Word8
   35 
   36         -- * Standard C Functions
   37         c_strlen,               -- :: CString -> IO CInt
   38         c_free_finalizer,       -- :: FunPtr (Ptr Word8 -> IO ())
   39 
   40         memchr,                 -- :: Ptr Word8 -> Word8 -> CSize -> IO Ptr Word8
   41         memcmp,                 -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
   42         memcpy,                 -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
   43         memset,                 -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
   44 
   45         -- * cbits functions
   46         c_reverse,              -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
   47         c_intersperse,          -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
   48         c_maximum,              -- :: Ptr Word8 -> CInt -> IO Word8
   49         c_minimum,              -- :: Ptr Word8 -> CInt -> IO Word8
   50         c_count,                -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt
   51 
   52 #if defined(__GLASGOW_HASKELL__)
   53         -- * Internal GHC magic
   54         memcpy_ptr_baoff,       -- :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
   55 #endif
   56 
   57         -- * Chars
   58         w2c, c2w, isSpaceWord8, isSpaceChar8
   59 
   60   ) where
   61 
   62 import Foreign.ForeignPtr       (ForeignPtr, withForeignPtr)
   63 import Foreign.Ptr              (Ptr, FunPtr, plusPtr)
   64 import Foreign.Storable         (Storable(..))
   65 import Foreign.C.Types          (CInt, CSize, CULong)
   66 import Foreign.C.String         (CString)
   67 
   68 #ifndef __NHC__
   69 import Control.Exception        (assert)
   70 #endif
   71 
   72 import Data.Char                (ord)
   73 import Data.Word                (Word8)
   74 
   75 #if defined(__GLASGOW_HASKELL__)
   76 import Data.Generics            (Data(..), Typeable(..))
   77 import GHC.Ptr                  (Ptr(..))
   78 import GHC.Base                 (realWorld#,unsafeChr)
   79 import GHC.IOBase               (IO(IO), RawBuffer)
   80 #if __GLASGOW_HASKELL__ >= 608
   81 import GHC.IOBase               (unsafeDupablePerformIO)
   82 #else
   83 import GHC.IOBase               (unsafePerformIO)
   84 #endif
   85 #else
   86 import Data.Char                (chr)
   87 import System.IO.Unsafe         (unsafePerformIO)
   88 #endif
   89 
   90 #if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR)
   91 import GHC.ForeignPtr           (mallocPlainForeignPtrBytes)
   92 #else
   93 import Foreign.ForeignPtr       (mallocForeignPtrBytes)
   94 #endif
   95 
   96 #if __GLASGOW_HASKELL__>=605
   97 import GHC.ForeignPtr           (ForeignPtr(ForeignPtr))
   98 import GHC.Base                 (nullAddr#)
   99 #else
  100 import Foreign.Ptr              (nullPtr)
  101 #endif
  102 
  103 #if __HUGS__
  104 import Hugs.ForeignPtr          (newForeignPtr_)
  105 #elif __GLASGOW_HASKELL__<=604
  106 import Foreign.ForeignPtr       (newForeignPtr_)
  107 #endif
  108 
  109 -- CFILES stuff is Hugs only
  110 {-# CFILES cbits/fpstring.c #-}
  111 
  112 -- An alternative to Control.Exception (assert) for nhc98
  113 #ifdef __NHC__
  114 #define assert  assertS "__FILE__ : __LINE__"
  115 assertS :: String -> Bool -> a -> a
  116 assertS _ True  = id
  117 assertS s False = error ("assertion failed at "++s)
  118 #endif
  119 
  120 -- -----------------------------------------------------------------------------
  121 --
  122 -- Useful macros, until we have bang patterns
  123 --
  124 
  125 #define STRICT1(f) f a | a `seq` False = undefined
  126 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
  127 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
  128 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
  129 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
  130 
  131 -- -----------------------------------------------------------------------------
  132 
  133 -- | A space-efficient representation of a Word8 vector, supporting many
  134 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
  135 --
  136 -- Instances of Eq, Ord, Read, Show, Data, Typeable
  137 --
  138 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8) -- payload
  139                      {-# UNPACK #-} !Int                -- offset
  140                      {-# UNPACK #-} !Int                -- length
  141 
  142 #if defined(__GLASGOW_HASKELL__)
  143     deriving (Data, Typeable)
  144 #endif
  145 
  146 instance Show ByteString where
  147     showsPrec p ps r = showsPrec p (unpackWith w2c ps) r
  148 
  149 instance Read ByteString where
  150     readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ]
  151 
  152 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
  153 unpackWith :: (Word8 -> a) -> ByteString -> [a]
  154 unpackWith _ (PS _  _ 0) = []
  155 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
  156         go (p `plusPtr` s) (l - 1) []
  157     where
  158         STRICT3(go)                            
  159         go p 0 acc = peek p          >>= \e -> return (k e : acc)
  160         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc)
  161 {-# INLINE unpackWith #-}
  162 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
  163 
  164 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
  165 -- conversion function
  166 packWith :: (a -> Word8) -> [a] -> ByteString
  167 packWith k str = unsafeCreate (length str) $ \p -> go p str
  168     where
  169         STRICT2(go)                  
  170         go _ []     = return ()
  171         go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
  172 {-# INLINE packWith #-}
  173 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
  174 
  175 ------------------------------------------------------------------------
  176 
  177 -- | The 0 pointer. Used to indicate the empty Bytestring.
  178 nullForeignPtr :: ForeignPtr Word8
  179 #if __GLASGOW_HASKELL__>=605
  180 nullForeignPtr = ForeignPtr nullAddr# undefined --TODO: should ForeignPtrContents be strict?
  181 #else
  182 nullForeignPtr = unsafePerformIO $ newForeignPtr_ nullPtr
  183 {-# NOINLINE nullForeignPtr #-}
  184 #endif
  185 
  186 -- ---------------------------------------------------------------------
  187 -- Low level constructors
  188 
  189 -- | /O(1)/ Build a ByteString from a ForeignPtr
  190 fromForeignPtr :: ForeignPtr Word8
  191                -> Int -- ^ Offset
  192                -> Int -- ^ Length
  193                -> ByteString
  194 fromForeignPtr fp s l = PS fp s l
  195 {-# INLINE fromForeignPtr #-}
  196 
  197 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
  198 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) -- ^ (ptr, offset, length)
  199 toForeignPtr (PS ps s l) = (ps, s, l)
  200 {-# INLINE toForeignPtr #-}
  201 
  202 -- | A way of creating ByteStrings outside the IO monad. The @Int@
  203 -- argument gives the final size of the ByteString. Unlike
  204 -- 'createAndTrim' the ByteString is not reallocated if the final size
  205 -- is less than the estimated size.
  206 unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString
  207 unsafeCreate l f = unsafeDupablePerformIO (create l f)
  208 {-# INLINE unsafeCreate #-}
  209 
  210 #if !defined(__GLASGOW_HASKELL__) || __GLASGOW_HASKELL__ < 608
  211 -- for Hugs   
  212 unsafeDupablePerformIO :: IO a -> a
  213 unsafeDupablePerformIO = unsafePerformIO
  214 #endif
  215 
  216 -- | Create ByteString of size @l@ and use action @f@ to fill it's contents.
  217 create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
  218 create l f = do
  219     fp <- mallocByteString l
  220     withForeignPtr fp $ \p -> f p
  221     return $! PS fp 0 l
  222 {-# INLINE create #-}
  223 
  224 -- | Given the maximum size needed and a function to make the contents
  225 -- of a ByteString, createAndTrim makes the 'ByteString'. The generating
  226 -- function is required to return the actual final size (<= the maximum
  227 -- size), and the resulting byte array is realloced to this size.
  228 --
  229 -- createAndTrim is the main mechanism for creating custom, efficient
  230 -- ByteString functions, using Haskell or C functions to fill the space.
  231 --
  232 createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
  233 createAndTrim l f = do
  234     fp <- mallocByteString l
  235     withForeignPtr fp $ \p -> do
  236         l' <- f p
  237         if assert (l' <= l) $ l' >= l
  238             then return $! PS fp 0 l
  239             else create l' $ \p' -> memcpy p' p (fromIntegral l')
  240 {-# INLINE createAndTrim #-}
  241 
  242 createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
  243 createAndTrim' l f = do
  244     fp <- mallocByteString l
  245     withForeignPtr fp $ \p -> do
  246         (off, l', res) <- f p
  247         if assert (l' <= l) $ l' >= l
  248             then return $! (PS fp 0 l, res)
  249             else do ps <- create l' $ \p' ->
  250                             memcpy p' (p `plusPtr` off) (fromIntegral l')
  251                     return $! (ps, res)
  252 
  253 -- | Wrapper of mallocForeignPtrBytes with faster implementation
  254 -- for GHC 6.5 builds newer than 06/06/06
  255 mallocByteString :: Int -> IO (ForeignPtr a)
  256 mallocByteString l = do
  257 #if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR)
  258     mallocPlainForeignPtrBytes l
  259 #else
  260     mallocForeignPtrBytes l
  261 #endif
  262 {-# INLINE mallocByteString #-}
  263 
  264 ------------------------------------------------------------------------
  265 
  266 -- | Conversion between 'Word8' and 'Char'. Should compile to a no-op.
  267 w2c :: Word8 -> Char
  268 #if !defined(__GLASGOW_HASKELL__)
  269 w2c = chr . fromIntegral
  270 #else
  271 w2c = unsafeChr . fromIntegral
  272 #endif
  273 {-# INLINE w2c #-}
  274 
  275 -- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and
  276 -- silently truncates to 8 bits Chars > '\255'. It is provided as
  277 -- convenience for ByteString construction.
  278 c2w :: Char -> Word8
  279 c2w = fromIntegral . ord
  280 {-# INLINE c2w #-}
  281 
  282 -- | Selects words corresponding to white-space characters in the Latin-1 range
  283 -- ordered by frequency. 
  284 isSpaceWord8 :: Word8 -> Bool
  285 isSpaceWord8 w =
  286     w == 0x20 ||
  287     w == 0x0A || -- LF, \n
  288     w == 0x09 || -- HT, \t
  289     w == 0x0C || -- FF, \f
  290     w == 0x0D || -- CR, \r
  291     w == 0x0B || -- VT, \v
  292     w == 0xA0    -- spotted by QC..
  293 {-# INLINE isSpaceWord8 #-}
  294 
  295 -- | Selects white-space characters in the Latin-1 range
  296 isSpaceChar8 :: Char -> Bool
  297 isSpaceChar8 c =
  298     c == ' '     ||
  299     c == '\t'    ||
  300     c == '\n'    ||
  301     c == '\r'    ||
  302     c == '\f'    ||
  303     c == '\v'    ||
  304     c == '\xa0'
  305 {-# INLINE isSpaceChar8 #-}
  306 
  307 ------------------------------------------------------------------------
  308 
  309 -- | Just like unsafePerformIO, but we inline it. Big performance gains as
  310 -- it exposes lots of things to further inlining. /Very unsafe/. In
  311 -- particular, you should do no memory allocation inside an
  312 -- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@.
  313 --
  314 {-# INLINE inlinePerformIO #-}
  315 inlinePerformIO :: IO a -> a
  316 #if defined(__GLASGOW_HASKELL__)
  317 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
  318 #else
  319 inlinePerformIO = unsafePerformIO
  320 #endif
  321 
  322 -- ---------------------------------------------------------------------
  323 -- 
  324 -- Standard C functions
  325 --
  326 
  327 foreign import ccall unsafe "string.h strlen" c_strlen
  328     :: CString -> IO CSize
  329 
  330 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
  331     :: FunPtr (Ptr Word8 -> IO ())
  332 
  333 foreign import ccall unsafe "string.h memchr" c_memchr
  334     :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
  335 
  336 memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
  337 memchr p w s = c_memchr p (fromIntegral w) s
  338 
  339 foreign import ccall unsafe "string.h memcmp" memcmp
  340     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
  341 
  342 foreign import ccall unsafe "string.h memcpy" c_memcpy
  343     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
  344 
  345 memcpy :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
  346 memcpy p q s = c_memcpy p q s >> return ()
  347 
  348 {-
  349 foreign import ccall unsafe "string.h memmove" c_memmove
  350     :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
  351 
  352 memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
  353 memmove p q s = do c_memmove p q s
  354                    return ()
  355 -}
  356 
  357 foreign import ccall unsafe "string.h memset" c_memset
  358     :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
  359 
  360 memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
  361 memset p w s = c_memset p (fromIntegral w) s
  362 
  363 -- ---------------------------------------------------------------------
  364 --
  365 -- Uses our C code
  366 --
  367 
  368 foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse
  369     :: Ptr Word8 -> Ptr Word8 -> CULong -> IO ()
  370 
  371 foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse
  372     :: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO ()
  373 
  374 foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum
  375     :: Ptr Word8 -> CULong -> IO Word8
  376 
  377 foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum
  378     :: Ptr Word8 -> CULong -> IO Word8
  379 
  380 foreign import ccall unsafe "static fpstring.h fps_count" c_count
  381     :: Ptr Word8 -> CULong -> Word8 -> IO CULong
  382 
  383 -- ---------------------------------------------------------------------
  384 -- Internal GHC Haskell magic
  385 
  386 #if defined(__GLASGOW_HASKELL__)
  387 foreign import ccall unsafe "__hscore_memcpy_src_off"
  388    memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ())
  389 #endif