1 {-# LANGUAGE CPP #-} 2 {-# OPTIONS_GHC -XMagicHash #-} 3 -- | 4 -- Module : Data.ByteString.Unsafe 5 -- License : BSD-style 6 -- Maintainer : dons@cse.unsw.edu.au, duncan@haskell.org 7 -- Stability : experimental 8 -- Portability : portable 9 -- 10 -- A module containing unsafe 'ByteString' operations. This exposes 11 -- the 'ByteString' representation and low level construction functions. 12 -- Modules which extend the 'ByteString' system will need to use this module 13 -- while ideally most users will be able to make do with the public interface 14 -- modules. 15 -- 16 module Data.ByteString.Unsafe ( 17 18 -- * Unchecked access 19 unsafeHead, -- :: ByteString -> Word8 20 unsafeTail, -- :: ByteString -> ByteString 21 unsafeIndex, -- :: ByteString -> Int -> Word8 22 unsafeTake, -- :: Int -> ByteString -> ByteString 23 unsafeDrop, -- :: Int -> ByteString -> ByteString 24 25 -- * Low level interaction with CStrings 26 -- ** Using ByteStrings with functions for CStrings 27 unsafeUseAsCString, -- :: ByteString -> (CString -> IO a) -> IO a 28 unsafeUseAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a 29 30 -- ** Converting CStrings to ByteStrings 31 unsafePackCString, -- :: CString -> IO ByteString 32 unsafePackCStringLen, -- :: CStringLen -> IO ByteString 33 unsafePackMallocCString,-- :: CString -> IO ByteString 34 35 #if defined(__GLASGOW_HASKELL__) 36 unsafePackAddress, -- :: Addr# -> IO ByteString 37 unsafePackAddressLen, -- :: Int -> Addr# -> IO ByteString 38 unsafePackCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString 39 unsafeFinalize, -- :: ByteString -> IO () 40 #endif 41 42 ) where 43 44 import Data.ByteString.Internal 45 46 import Foreign.ForeignPtr (newForeignPtr_, newForeignPtr, withForeignPtr) 47 import Foreign.Ptr (Ptr, plusPtr, castPtr) 48 49 import Foreign.Storable (Storable(..)) 50 import Foreign.C.String (CString, CStringLen) 51 52 #ifndef __NHC__ 53 import Control.Exception (assert) 54 #endif 55 56 import Data.Word (Word8) 57 58 #if defined(__GLASGOW_HASKELL__) 59 import qualified Foreign.ForeignPtr as FC (finalizeForeignPtr) 60 import qualified Foreign.Concurrent as FC (newForeignPtr) 61 62 --import Data.Generics (Data(..), Typeable(..)) 63 64 import GHC.Prim (Addr#) 65 import GHC.Ptr (Ptr(..)) 66 #endif 67 68 -- An alternative to Control.Exception (assert) for nhc98 69 #ifdef __NHC__ 70 #define assert assertS "__FILE__ : __LINE__" 71 assertS :: String -> Bool -> a -> a 72 assertS _ True = id 73 assertS s False = error ("assertion failed at "++s) 74 #endif 75 76 -- ----------------------------------------------------------------------------- 77 -- 78 -- Useful macros, until we have bang patterns 79 -- 80 81 #define STRICT1(f) f a | a `seq` False = undefined 82 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined 83 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined 84 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined 85 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined 86 87 -- --------------------------------------------------------------------- 88 -- 89 -- Extensions to the basic interface 90 -- 91 92 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the 93 -- check for the empty case, so there is an obligation on the programmer 94 -- to provide a proof that the ByteString is non-empty. 95 unsafeHead :: ByteString -> Word8 96 unsafeHead (PS x s l) = assert (l > 0) $ 97 inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s 98 {-# INLINE unsafeHead #-} 99 100 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the 101 -- check for the empty case. As with 'unsafeHead', the programmer must 102 -- provide a separate proof that the ByteString is non-empty. 103 unsafeTail :: ByteString -> ByteString 104 unsafeTail (PS ps s l) = assert (l > 0) $ PS ps (s+1) (l-1) 105 {-# INLINE unsafeTail #-} 106 107 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8' 108 -- This omits the bounds check, which means there is an accompanying 109 -- obligation on the programmer to ensure the bounds are checked in some 110 -- other way. 111 unsafeIndex :: ByteString -> Int -> Word8 112 unsafeIndex (PS x s l) i = assert (i >= 0 && i < l) $ 113 inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i) 114 {-# INLINE unsafeIndex #-} 115 116 -- | A variety of 'take' which omits the checks on @n@ so there is an 117 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. 118 unsafeTake :: Int -> ByteString -> ByteString 119 unsafeTake n (PS x s l) = assert (0 <= n && n <= l) $ PS x s n 120 {-# INLINE unsafeTake #-} 121 122 -- | A variety of 'drop' which omits the checks on @n@ so there is an 123 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@. 124 unsafeDrop :: Int -> ByteString -> ByteString 125 unsafeDrop n (PS x s l) = assert (0 <= n && n <= l) $ PS x (s+n) (l-n) 126 {-# INLINE unsafeDrop #-} 127 128 129 #if defined(__GLASGOW_HASKELL__) 130 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an 131 -- Addr\# (an arbitrary machine address assumed to point outside the 132 -- garbage-collected heap) into a @ByteString@. A much faster way to 133 -- create an Addr\# is with an unboxed string literal, than to pack a 134 -- boxed string. A unboxed string literal is compiled to a static @char 135 -- []@ by GHC. Establishing the length of the string requires a call to 136 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as 137 -- is the case with "string"# literals in GHC). Use 'unsafePackAddressLen' 138 -- if you know the length of the string statically. 139 -- 140 -- An example: 141 -- 142 -- > literalFS = unsafePackAddress "literal"# 143 -- 144 -- This function is /unsafe/. If you modify the buffer pointed to by the 145 -- original Addr# this modification will be reflected in the resulting 146 -- @ByteString@, breaking referential transparency. 147 -- 148 -- Note this also won't work if you Add# has embedded '\0' characters in 149 -- the string (strlen will fail). 150 -- 151 unsafePackAddress :: Addr# -> IO ByteString 152 unsafePackAddress addr# = do 153 p <- newForeignPtr_ cstr 154 l <- c_strlen cstr 155 return $ PS p 0 (fromIntegral l) 156 where 157 cstr = Ptr addr# 158 {-# INLINE unsafePackAddress #-} 159 160 -- | /O(1)/ 'unsafePackAddressLen' provides constant-time construction of 161 -- 'ByteStrings' which is ideal for string literals. It packs a 162 -- null-terminated sequence of bytes into a 'ByteString', given a raw 163 -- 'Addr\#' to the string, and the length of the string. 164 -- 165 -- This function is /unsafe/ in two ways: 166 -- 167 -- * the length argument is assumed to be correct. If the length 168 -- argument is incorrect, it is possible to overstep the end of the 169 -- byte array. 170 -- 171 -- * if the underying Addr# is later modified, this change will be 172 -- reflected in resulting @ByteString@, breaking referential 173 -- transparency. 174 -- 175 -- If in doubt, don't use these functions. 176 -- 177 unsafePackAddressLen :: Int -> Addr# -> IO ByteString 178 unsafePackAddressLen len addr# = do 179 p <- newForeignPtr_ (Ptr addr#) 180 return $ PS p 0 len 181 {-# INLINE unsafePackAddressLen #-} 182 183 -- | /O(1)/ Construct a 'ByteString' given a Ptr Word8 to a buffer, a 184 -- length, and an IO action representing a finalizer. This function is 185 -- not available on Hugs. 186 -- 187 -- This function is /unsafe/, it is possible to break referential 188 -- transparency by modifying the underlying buffer pointed to by the 189 -- first argument. Any changes to the original buffer will be reflected 190 -- in the resulting @ByteString@. 191 -- 192 unsafePackCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString 193 unsafePackCStringFinalizer p l f = do 194 fp <- FC.newForeignPtr p f 195 return $ PS fp 0 l 196 197 -- | Explicitly run the finaliser associated with a 'ByteString'. 198 -- References to this value after finalisation may generate invalid memory 199 -- references. 200 -- 201 -- This function is /unsafe/, as there may be other 202 -- 'ByteStrings' referring to the same underlying pages. If you use 203 -- this, you need to have a proof of some kind that all 'ByteString's 204 -- ever generated from the underlying byte array are no longer live. 205 -- 206 unsafeFinalize :: ByteString -> IO () 207 unsafeFinalize (PS p _ _) = FC.finalizeForeignPtr p 208 209 #endif 210 211 ------------------------------------------------------------------------ 212 -- Packing CStrings into ByteStrings 213 214 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/ 215 -- finalizer associated to it, and will not be garbage collected by 216 -- Haskell. The ByteString length is calculated using /strlen(3)/, 217 -- and thus the complexity is a /O(n)/. 218 -- 219 -- This function is /unsafe/. If the @CString@ is later modified, this 220 -- change will be reflected in the resulting @ByteString@, breaking 221 -- referential transparency. 222 -- 223 unsafePackCString :: CString -> IO ByteString 224 unsafePackCString cstr = do 225 fp <- newForeignPtr_ (castPtr cstr) 226 l <- c_strlen cstr 227 return $! PS fp 0 (fromIntegral l) 228 229 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will 230 -- have /no/ finalizer associated with it, and will not be garbage 231 -- collected by Haskell. This operation has /O(1)/ complexity as we 232 -- already know the final size, so no /strlen(3)/ is required. 233 -- 234 -- This funtion is /unsafe/. If the original @CStringLen@ is later 235 -- modified, this change will be reflected in the resulting @ByteString@, 236 -- breaking referential transparency. 237 -- 238 unsafePackCStringLen :: CStringLen -> IO ByteString 239 unsafePackCStringLen (ptr,len) = do 240 fp <- newForeignPtr_ (castPtr ptr) 241 return $! PS fp 0 (fromIntegral len) 242 243 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will 244 -- have a @free(3)@ finalizer associated to it. 245 -- 246 -- This funtion is /unsafe/. If the original @CString@ is later 247 -- modified, this change will be reflected in the resulting @ByteString@, 248 -- breaking referential transparency. 249 -- 250 -- This function is also unsafe if you call its finalizer twice, 251 -- which will result in a /double free/ error, or if you pass it 252 -- a CString not allocated with 'malloc'. 253 -- 254 unsafePackMallocCString :: CString -> IO ByteString 255 unsafePackMallocCString cstr = do 256 fp <- newForeignPtr c_free_finalizer (castPtr cstr) 257 len <- c_strlen cstr 258 return $! PS fp 0 (fromIntegral len) 259 260 -- --------------------------------------------------------------------- 261 262 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a 263 -- @CString@. 264 -- 265 -- This function does zero copying, and merely unwraps a @ByteString@ to 266 -- appear as a @CString@. It is /unsafe/ in two ways: 267 -- 268 -- * After calling this function the @CString@ shares the underlying 269 -- byte buffer with the original @ByteString@. Thus modifying the 270 -- @CString@, either in C, or using poke, will cause the contents of the 271 -- @ByteString@ to change, breaking referential transparency. Other 272 -- @ByteStrings@ created by sharing (such as those produced via 'take' 273 -- or 'drop') will also reflect these changes. Modifying the @CString@ 274 -- will break referential transparency. To avoid this, use 275 -- @useAsCString@, which makes a copy of the original @ByteString@. 276 -- 277 -- * @CStrings@ are often passed to functions that require them to be 278 -- null-terminated. If the original @ByteString@ wasn't null terminated, 279 -- neither will the @CString@ be. It is the programmers responsibility 280 -- to guarantee that the @ByteString@ is indeed null terminated. If in 281 -- doubt, use @useAsCString@. 282 -- 283 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a 284 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s) 285 286 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a 287 -- @CStringLen@. 288 -- 289 -- This function does zero copying, and merely unwraps a @ByteString@ to 290 -- appear as a @CStringLen@. It is /unsafe/: 291 -- 292 -- * After calling this function the @CStringLen@ shares the underlying 293 -- byte buffer with the original @ByteString@. Thus modifying the 294 -- @CStringLen@, either in C, or using poke, will cause the contents of the 295 -- @ByteString@ to change, breaking referential transparency. Other 296 -- @ByteStrings@ created by sharing (such as those produced via 'take' 297 -- or 'drop') will also reflect these changes. Modifying the @CStringLen@ 298 -- will break referential transparency. To avoid this, use 299 -- @useAsCStringLen@, which makes a copy of the original @ByteString@. 300 -- 301 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a 302 unsafeUseAsCStringLen (PS ps s l) f = withForeignPtr ps $ \p -> f (castPtr p `plusPtr` s,l)