1 {-# LANGUAGE CPP, ForeignFunctionInterface #-} 2 {-# OPTIONS_GHC -XUnliftedFFITypes -XMagicHash -XUnboxedTuples -XDeriveDataTypeable #-} 3 -- | 4 -- Module : Data.ByteString.Internal 5 -- License : BSD-style 6 -- Maintainer : Don Stewart <dons@galois.com> 7 -- Stability : experimental 8 -- Portability : portable 9 -- 10 -- A module containing semi-public 'ByteString' internals. This exposes 11 -- the 'ByteString' representation and low level construction functions. 12 -- Modules which extend the 'ByteString' system will need to use this module 13 -- while ideally most users will be able to make do with the public interface 14 -- modules. 15 -- 16 module Data.ByteString.Internal ( 17 18 -- * The @ByteString@ type and representation 19 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable 20 21 -- * Low level introduction and elimination 22 create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString 23 createAndTrim, -- :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString 24 createAndTrim', -- :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a) 25 unsafeCreate, -- :: Int -> (Ptr Word8 -> IO ()) -> ByteString 26 mallocByteString, -- :: Int -> IO (ForeignPtr a) 27 28 -- * Conversion to and from ForeignPtrs 29 fromForeignPtr, -- :: ForeignPtr Word8 -> Int -> Int -> ByteString 30 toForeignPtr, -- :: ByteString -> (ForeignPtr Word8, Int, Int) 31 32 -- * Utilities 33 inlinePerformIO, -- :: IO a -> a 34 nullForeignPtr, -- :: ForeignPtr Word8 35 36 -- * Standard C Functions 37 c_strlen, -- :: CString -> IO CInt 38 c_free_finalizer, -- :: FunPtr (Ptr Word8 -> IO ()) 39 40 memchr, -- :: Ptr Word8 -> Word8 -> CSize -> IO Ptr Word8 41 memcmp, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt 42 memcpy, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () 43 memset, -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) 44 45 -- * cbits functions 46 c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO () 47 c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO () 48 c_maximum, -- :: Ptr Word8 -> CInt -> IO Word8 49 c_minimum, -- :: Ptr Word8 -> CInt -> IO Word8 50 c_count, -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt 51 52 #if defined(__GLASGOW_HASKELL__) 53 -- * Internal GHC magic 54 memcpy_ptr_baoff, -- :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ()) 55 #endif 56 57 -- * Chars 58 w2c, c2w, isSpaceWord8, isSpaceChar8 59 60 ) where 61 62 import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) 63 import Foreign.Ptr (Ptr, FunPtr, plusPtr) 64 import Foreign.Storable (Storable(..)) 65 import Foreign.C.Types (CInt, CSize, CULong) 66 import Foreign.C.String (CString) 67 68 #ifndef __NHC__ 69 import Control.Exception (assert) 70 #endif 71 72 import Data.Char (ord) 73 import Data.Word (Word8) 74 75 #if defined(__GLASGOW_HASKELL__) 76 import Data.Generics (Data(..), Typeable(..)) 77 import GHC.Ptr (Ptr(..)) 78 import GHC.Base (realWorld#,unsafeChr) 79 import GHC.IOBase (IO(IO), RawBuffer) 80 #if __GLASGOW_HASKELL__ >= 608 81 import GHC.IOBase (unsafeDupablePerformIO) 82 #else 83 import GHC.IOBase (unsafePerformIO) 84 #endif 85 #else 86 import Data.Char (chr) 87 import System.IO.Unsafe (unsafePerformIO) 88 #endif 89 90 #if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR) 91 import GHC.ForeignPtr (mallocPlainForeignPtrBytes) 92 #else 93 import Foreign.ForeignPtr (mallocForeignPtrBytes) 94 #endif 95 96 #if __GLASGOW_HASKELL__>=605 97 import GHC.ForeignPtr (ForeignPtr(ForeignPtr)) 98 import GHC.Base (nullAddr#) 99 #else 100 import Foreign.Ptr (nullPtr) 101 #endif 102 103 #if __HUGS__ 104 import Hugs.ForeignPtr (newForeignPtr_) 105 #elif __GLASGOW_HASKELL__<=604 106 import Foreign.ForeignPtr (newForeignPtr_) 107 #endif 108 109 -- CFILES stuff is Hugs only 110 {-# CFILES cbits/fpstring.c #-} 111 112 -- An alternative to Control.Exception (assert) for nhc98 113 #ifdef __NHC__ 114 #define assert assertS "__FILE__ : __LINE__" 115 assertS :: String -> Bool -> a -> a 116 assertS _ True = id 117 assertS s False = error ("assertion failed at "++s) 118 #endif 119 120 -- ----------------------------------------------------------------------------- 121 -- 122 -- Useful macros, until we have bang patterns 123 -- 124 125 #define STRICT1(f) f a | a `seq` False = undefined 126 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined 127 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined 128 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined 129 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined 130 131 -- ----------------------------------------------------------------------------- 132 133 -- | A space-efficient representation of a Word8 vector, supporting many 134 -- efficient operations. A 'ByteString' contains 8-bit characters only. 135 -- 136 -- Instances of Eq, Ord, Read, Show, Data, Typeable 137 -- 138 data ByteString = PS {-# UNPACK #-} !(ForeignPtr Word8) -- payload 139 {-# UNPACK #-} !Int -- offset 140 {-# UNPACK #-} !Int -- length 141 142 #if defined(__GLASGOW_HASKELL__) 143 deriving (Data, Typeable) 144 #endif 145 146 instance Show ByteString where 147 showsPrec p ps r = showsPrec p (unpackWith w2c ps) r 148 149 instance Read ByteString where 150 readsPrec p str = [ (packWith c2w x, y) | (x, y) <- readsPrec p str ] 151 152 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function. 153 unpackWith :: (Word8 -> a) -> ByteString -> [a] 154 unpackWith _ (PS _ _ 0) = [] 155 unpackWith k (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> 156 go (p `plusPtr` s) (l - 1) [] 157 where 158 STRICT3(go) 159 go p 0 acc = peek p >>= \e -> return (k e : acc) 160 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (k e : acc) 161 {-# INLINE unpackWith #-} 162 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-} 163 164 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some 165 -- conversion function 166 packWith :: (a -> Word8) -> [a] -> ByteString 167 packWith k str = unsafeCreate (length str) $ \p -> go p str 168 where 169 STRICT2(go) 170 go _ [] = return () 171 go p (x:xs) = poke p (k x) >> go (p `plusPtr` 1) xs -- less space than pokeElemOff 172 {-# INLINE packWith #-} 173 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-} 174 175 ------------------------------------------------------------------------ 176 177 -- | The 0 pointer. Used to indicate the empty Bytestring. 178 nullForeignPtr :: ForeignPtr Word8 179 #if __GLASGOW_HASKELL__>=605 180 nullForeignPtr = ForeignPtr nullAddr# undefined --TODO: should ForeignPtrContents be strict? 181 #else 182 nullForeignPtr = unsafePerformIO $ newForeignPtr_ nullPtr 183 {-# NOINLINE nullForeignPtr #-} 184 #endif 185 186 -- --------------------------------------------------------------------- 187 -- Low level constructors 188 189 -- | /O(1)/ Build a ByteString from a ForeignPtr 190 fromForeignPtr :: ForeignPtr Word8 191 -> Int -- ^ Offset 192 -> Int -- ^ Length 193 -> ByteString 194 fromForeignPtr fp s l = PS fp s l 195 {-# INLINE fromForeignPtr #-} 196 197 -- | /O(1)/ Deconstruct a ForeignPtr from a ByteString 198 toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) -- ^ (ptr, offset, length) 199 toForeignPtr (PS ps s l) = (ps, s, l) 200 {-# INLINE toForeignPtr #-} 201 202 -- | A way of creating ByteStrings outside the IO monad. The @Int@ 203 -- argument gives the final size of the ByteString. Unlike 204 -- 'createAndTrim' the ByteString is not reallocated if the final size 205 -- is less than the estimated size. 206 unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString 207 unsafeCreate l f = unsafeDupablePerformIO (create l f) 208 {-# INLINE unsafeCreate #-} 209 210 #if !defined(__GLASGOW_HASKELL__) || __GLASGOW_HASKELL__ < 608 211 -- for Hugs 212 unsafeDupablePerformIO :: IO a -> a 213 unsafeDupablePerformIO = unsafePerformIO 214 #endif 215 216 -- | Create ByteString of size @l@ and use action @f@ to fill it's contents. 217 create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString 218 create l f = do 219 fp <- mallocByteString l 220 withForeignPtr fp $ \p -> f p 221 return $! PS fp 0 l 222 {-# INLINE create #-} 223 224 -- | Given the maximum size needed and a function to make the contents 225 -- of a ByteString, createAndTrim makes the 'ByteString'. The generating 226 -- function is required to return the actual final size (<= the maximum 227 -- size), and the resulting byte array is realloced to this size. 228 -- 229 -- createAndTrim is the main mechanism for creating custom, efficient 230 -- ByteString functions, using Haskell or C functions to fill the space. 231 -- 232 createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString 233 createAndTrim l f = do 234 fp <- mallocByteString l 235 withForeignPtr fp $ \p -> do 236 l' <- f p 237 if assert (l' <= l) $ l' >= l 238 then return $! PS fp 0 l 239 else create l' $ \p' -> memcpy p' p (fromIntegral l') 240 {-# INLINE createAndTrim #-} 241 242 createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a) 243 createAndTrim' l f = do 244 fp <- mallocByteString l 245 withForeignPtr fp $ \p -> do 246 (off, l', res) <- f p 247 if assert (l' <= l) $ l' >= l 248 then return $! (PS fp 0 l, res) 249 else do ps <- create l' $ \p' -> 250 memcpy p' (p `plusPtr` off) (fromIntegral l') 251 return $! (ps, res) 252 253 -- | Wrapper of mallocForeignPtrBytes with faster implementation 254 -- for GHC 6.5 builds newer than 06/06/06 255 mallocByteString :: Int -> IO (ForeignPtr a) 256 mallocByteString l = do 257 #if __GLASGOW_HASKELL__ >= 605 && !defined(SLOW_FOREIGN_PTR) 258 mallocPlainForeignPtrBytes l 259 #else 260 mallocForeignPtrBytes l 261 #endif 262 {-# INLINE mallocByteString #-} 263 264 ------------------------------------------------------------------------ 265 266 -- | Conversion between 'Word8' and 'Char'. Should compile to a no-op. 267 w2c :: Word8 -> Char 268 #if !defined(__GLASGOW_HASKELL__) 269 w2c = chr . fromIntegral 270 #else 271 w2c = unsafeChr . fromIntegral 272 #endif 273 {-# INLINE w2c #-} 274 275 -- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and 276 -- silently truncates to 8 bits Chars > '\255'. It is provided as 277 -- convenience for ByteString construction. 278 c2w :: Char -> Word8 279 c2w = fromIntegral . ord 280 {-# INLINE c2w #-} 281 282 -- | Selects words corresponding to white-space characters in the Latin-1 range 283 -- ordered by frequency. 284 isSpaceWord8 :: Word8 -> Bool 285 isSpaceWord8 w = 286 w == 0x20 || 287 w == 0x0A || -- LF, \n 288 w == 0x09 || -- HT, \t 289 w == 0x0C || -- FF, \f 290 w == 0x0D || -- CR, \r 291 w == 0x0B || -- VT, \v 292 w == 0xA0 -- spotted by QC.. 293 {-# INLINE isSpaceWord8 #-} 294 295 -- | Selects white-space characters in the Latin-1 range 296 isSpaceChar8 :: Char -> Bool 297 isSpaceChar8 c = 298 c == ' ' || 299 c == '\t' || 300 c == '\n' || 301 c == '\r' || 302 c == '\f' || 303 c == '\v' || 304 c == '\xa0' 305 {-# INLINE isSpaceChar8 #-} 306 307 ------------------------------------------------------------------------ 308 309 -- | Just like unsafePerformIO, but we inline it. Big performance gains as 310 -- it exposes lots of things to further inlining. /Very unsafe/. In 311 -- particular, you should do no memory allocation inside an 312 -- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@. 313 -- 314 {-# INLINE inlinePerformIO #-} 315 inlinePerformIO :: IO a -> a 316 #if defined(__GLASGOW_HASKELL__) 317 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r 318 #else 319 inlinePerformIO = unsafePerformIO 320 #endif 321 322 -- --------------------------------------------------------------------- 323 -- 324 -- Standard C functions 325 -- 326 327 foreign import ccall unsafe "string.h strlen" c_strlen 328 :: CString -> IO CSize 329 330 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer 331 :: FunPtr (Ptr Word8 -> IO ()) 332 333 foreign import ccall unsafe "string.h memchr" c_memchr 334 :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8) 335 336 memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) 337 memchr p w s = c_memchr p (fromIntegral w) s 338 339 foreign import ccall unsafe "string.h memcmp" memcmp 340 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt 341 342 foreign import ccall unsafe "string.h memcpy" c_memcpy 343 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) 344 345 memcpy :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () 346 memcpy p q s = c_memcpy p q s >> return () 347 348 {- 349 foreign import ccall unsafe "string.h memmove" c_memmove 350 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) 351 352 memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () 353 memmove p q s = do c_memmove p q s 354 return () 355 -} 356 357 foreign import ccall unsafe "string.h memset" c_memset 358 :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8) 359 360 memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) 361 memset p w s = c_memset p (fromIntegral w) s 362 363 -- --------------------------------------------------------------------- 364 -- 365 -- Uses our C code 366 -- 367 368 foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse 369 :: Ptr Word8 -> Ptr Word8 -> CULong -> IO () 370 371 foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse 372 :: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO () 373 374 foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum 375 :: Ptr Word8 -> CULong -> IO Word8 376 377 foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum 378 :: Ptr Word8 -> CULong -> IO Word8 379 380 foreign import ccall unsafe "static fpstring.h fps_count" c_count 381 :: Ptr Word8 -> CULong -> Word8 -> IO CULong 382 383 -- --------------------------------------------------------------------- 384 -- Internal GHC Haskell magic 385 386 #if defined(__GLASGOW_HASKELL__) 387 foreign import ccall unsafe "__hscore_memcpy_src_off" 388 memcpy_ptr_baoff :: Ptr a -> RawBuffer -> CInt -> CSize -> IO (Ptr ()) 389 #endif