1 {-# LANGUAGE CPP #-}
    2 {-# OPTIONS_GHC -XMagicHash #-}
    3 -- |
    4 -- Module      : Data.ByteString.Unsafe
    5 -- License     : BSD-style
    6 -- Maintainer  : dons@cse.unsw.edu.au, duncan@haskell.org
    7 -- Stability   : experimental
    8 -- Portability : portable
    9 -- 
   10 -- A module containing unsafe 'ByteString' operations. This exposes
   11 -- the 'ByteString' representation and low level construction functions.
   12 -- Modules which extend the 'ByteString' system will need to use this module
   13 -- while ideally most users will be able to make do with the public interface
   14 -- modules.
   15 --
   16 module Data.ByteString.Unsafe (
   17 
   18         -- * Unchecked access
   19         unsafeHead,             -- :: ByteString -> Word8
   20         unsafeTail,             -- :: ByteString -> ByteString
   21         unsafeIndex,            -- :: ByteString -> Int -> Word8
   22         unsafeTake,             -- :: Int -> ByteString -> ByteString
   23         unsafeDrop,             -- :: Int -> ByteString -> ByteString
   24 
   25         -- * Low level interaction with CStrings
   26         -- ** Using ByteStrings with functions for CStrings
   27         unsafeUseAsCString,     -- :: ByteString -> (CString -> IO a) -> IO a
   28         unsafeUseAsCStringLen,  -- :: ByteString -> (CStringLen -> IO a) -> IO a
   29 
   30         -- ** Converting CStrings to ByteStrings
   31         unsafePackCString,      -- :: CString -> IO ByteString
   32         unsafePackCStringLen,   -- :: CStringLen -> IO ByteString
   33         unsafePackMallocCString,-- :: CString -> IO ByteString
   34 
   35 #if defined(__GLASGOW_HASKELL__)
   36         unsafePackAddress,          -- :: Addr# -> IO ByteString
   37         unsafePackAddressLen,       -- :: Int -> Addr# -> IO ByteString
   38         unsafePackCStringFinalizer, -- :: Ptr Word8 -> Int -> IO () -> IO ByteString
   39         unsafeFinalize,             -- :: ByteString -> IO ()
   40 #endif
   41 
   42   ) where
   43 
   44 import Data.ByteString.Internal
   45 
   46 import Foreign.ForeignPtr       (newForeignPtr_, newForeignPtr, withForeignPtr)
   47 import Foreign.Ptr              (Ptr, plusPtr, castPtr)
   48 
   49 import Foreign.Storable         (Storable(..))
   50 import Foreign.C.String         (CString, CStringLen)
   51 
   52 #ifndef __NHC__
   53 import Control.Exception        (assert)
   54 #endif
   55 
   56 import Data.Word                (Word8)
   57 
   58 #if defined(__GLASGOW_HASKELL__)
   59 import qualified Foreign.ForeignPtr as FC (finalizeForeignPtr)
   60 import qualified Foreign.Concurrent as FC (newForeignPtr)
   61 
   62 --import Data.Generics            (Data(..), Typeable(..))
   63 
   64 import GHC.Prim                 (Addr#)
   65 import GHC.Ptr                  (Ptr(..))
   66 #endif
   67 
   68 -- An alternative to Control.Exception (assert) for nhc98
   69 #ifdef __NHC__
   70 #define assert  assertS "__FILE__ : __LINE__"
   71 assertS :: String -> Bool -> a -> a
   72 assertS _ True  = id
   73 assertS s False = error ("assertion failed at "++s)
   74 #endif
   75 
   76 -- -----------------------------------------------------------------------------
   77 --
   78 -- Useful macros, until we have bang patterns
   79 --
   80 
   81 #define STRICT1(f) f a | a `seq` False = undefined
   82 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
   83 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
   84 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
   85 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
   86 
   87 -- ---------------------------------------------------------------------
   88 --
   89 -- Extensions to the basic interface
   90 --
   91 
   92 -- | A variety of 'head' for non-empty ByteStrings. 'unsafeHead' omits the
   93 -- check for the empty case, so there is an obligation on the programmer
   94 -- to provide a proof that the ByteString is non-empty.
   95 unsafeHead :: ByteString -> Word8
   96 unsafeHead (PS x s l) = assert (l > 0) $
   97     inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
   98 {-# INLINE unsafeHead #-}
   99 
  100 -- | A variety of 'tail' for non-empty ByteStrings. 'unsafeTail' omits the
  101 -- check for the empty case. As with 'unsafeHead', the programmer must
  102 -- provide a separate proof that the ByteString is non-empty.
  103 unsafeTail :: ByteString -> ByteString
  104 unsafeTail (PS ps s l) = assert (l > 0) $ PS ps (s+1) (l-1)
  105 {-# INLINE unsafeTail #-}
  106 
  107 -- | Unsafe 'ByteString' index (subscript) operator, starting from 0, returning a 'Word8'
  108 -- This omits the bounds check, which means there is an accompanying
  109 -- obligation on the programmer to ensure the bounds are checked in some
  110 -- other way.
  111 unsafeIndex :: ByteString -> Int -> Word8
  112 unsafeIndex (PS x s l) i = assert (i >= 0 && i < l) $
  113     inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+i)
  114 {-# INLINE unsafeIndex #-}
  115 
  116 -- | A variety of 'take' which omits the checks on @n@ so there is an
  117 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
  118 unsafeTake :: Int -> ByteString -> ByteString
  119 unsafeTake n (PS x s l) = assert (0 <= n && n <= l) $ PS x s n
  120 {-# INLINE unsafeTake #-}
  121 
  122 -- | A variety of 'drop' which omits the checks on @n@ so there is an
  123 -- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
  124 unsafeDrop  :: Int -> ByteString -> ByteString
  125 unsafeDrop n (PS x s l) = assert (0 <= n && n <= l) $ PS x (s+n) (l-n)
  126 {-# INLINE unsafeDrop #-}
  127 
  128 
  129 #if defined(__GLASGOW_HASKELL__)
  130 -- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
  131 -- Addr\# (an arbitrary machine address assumed to point outside the
  132 -- garbage-collected heap) into a @ByteString@. A much faster way to
  133 -- create an Addr\# is with an unboxed string literal, than to pack a
  134 -- boxed string. A unboxed string literal is compiled to a static @char
  135 -- []@ by GHC. Establishing the length of the string requires a call to
  136 -- @strlen(3)@, so the Addr# must point to a null-terminated buffer (as
  137 -- is the case with "string"# literals in GHC). Use 'unsafePackAddressLen'
  138 -- if you know the length of the string statically.
  139 --
  140 -- An example:
  141 --
  142 -- > literalFS = unsafePackAddress "literal"#
  143 --
  144 -- This function is /unsafe/. If you modify the buffer pointed to by the
  145 -- original Addr# this modification will be reflected in the resulting
  146 -- @ByteString@, breaking referential transparency.
  147 --
  148 -- Note this also won't work if you Add# has embedded '\0' characters in
  149 -- the string (strlen will fail).
  150 --
  151 unsafePackAddress :: Addr# -> IO ByteString
  152 unsafePackAddress addr# = do
  153     p <- newForeignPtr_ cstr
  154     l <- c_strlen cstr
  155     return $ PS p 0 (fromIntegral l)
  156   where
  157     cstr = Ptr addr#
  158 {-# INLINE unsafePackAddress #-}
  159 
  160 -- | /O(1)/ 'unsafePackAddressLen' provides constant-time construction of
  161 -- 'ByteStrings' which is ideal for string literals. It packs a
  162 -- null-terminated sequence of bytes into a 'ByteString', given a raw
  163 -- 'Addr\#' to the string, and the length of the string.
  164 --
  165 -- This function is /unsafe/ in two ways:
  166 --
  167 -- * the length argument is assumed to be correct. If the length
  168 -- argument is incorrect, it is possible to overstep the end of the
  169 -- byte array.
  170 --
  171 -- * if the underying Addr# is later modified, this change will be
  172 -- reflected in resulting @ByteString@, breaking referential
  173 -- transparency.
  174 --
  175 -- If in doubt, don't use these functions.
  176 --
  177 unsafePackAddressLen :: Int -> Addr# -> IO ByteString
  178 unsafePackAddressLen len addr# = do
  179     p <- newForeignPtr_ (Ptr addr#)
  180     return $ PS p 0 len
  181 {-# INLINE unsafePackAddressLen #-}
  182 
  183 -- | /O(1)/ Construct a 'ByteString' given a Ptr Word8 to a buffer, a
  184 -- length, and an IO action representing a finalizer. This function is
  185 -- not available on Hugs.
  186 --
  187 -- This function is /unsafe/, it is possible to break referential
  188 -- transparency by modifying the underlying buffer pointed to by the
  189 -- first argument. Any changes to the original buffer will be reflected
  190 -- in the resulting @ByteString@.
  191 --
  192 unsafePackCStringFinalizer :: Ptr Word8 -> Int -> IO () -> IO ByteString
  193 unsafePackCStringFinalizer p l f = do
  194     fp <- FC.newForeignPtr p f
  195     return $ PS fp 0 l
  196 
  197 -- | Explicitly run the finaliser associated with a 'ByteString'.
  198 -- References to this value after finalisation may generate invalid memory
  199 -- references.
  200 --
  201 -- This function is /unsafe/, as there may be other
  202 -- 'ByteStrings' referring to the same underlying pages. If you use
  203 -- this, you need to have a proof of some kind that all 'ByteString's
  204 -- ever generated from the underlying byte array are no longer live.
  205 --
  206 unsafeFinalize :: ByteString -> IO ()
  207 unsafeFinalize (PS p _ _) = FC.finalizeForeignPtr p
  208 
  209 #endif
  210 
  211 ------------------------------------------------------------------------
  212 -- Packing CStrings into ByteStrings
  213 
  214 -- | /O(n)/ Build a @ByteString@ from a @CString@. This value will have /no/
  215 -- finalizer associated to it, and will not be garbage collected by
  216 -- Haskell. The ByteString length is calculated using /strlen(3)/,
  217 -- and thus the complexity is a /O(n)/.
  218 --
  219 -- This function is /unsafe/. If the @CString@ is later modified, this
  220 -- change will be reflected in the resulting @ByteString@, breaking
  221 -- referential transparency.
  222 --
  223 unsafePackCString :: CString -> IO ByteString
  224 unsafePackCString cstr = do
  225     fp <- newForeignPtr_ (castPtr cstr)
  226     l <- c_strlen cstr
  227     return $! PS fp 0 (fromIntegral l)
  228 
  229 -- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will
  230 -- have /no/ finalizer associated with it, and will not be garbage
  231 -- collected by Haskell. This operation has /O(1)/ complexity as we
  232 -- already know the final size, so no /strlen(3)/ is required.
  233 --
  234 -- This funtion is /unsafe/. If the original @CStringLen@ is later
  235 -- modified, this change will be reflected in the resulting @ByteString@,
  236 -- breaking referential transparency.
  237 --
  238 unsafePackCStringLen :: CStringLen -> IO ByteString
  239 unsafePackCStringLen (ptr,len) = do
  240     fp <- newForeignPtr_ (castPtr ptr)
  241     return $! PS fp 0 (fromIntegral len)
  242 
  243 -- | /O(n)/ Build a @ByteString@ from a malloced @CString@. This value will
  244 -- have a @free(3)@ finalizer associated to it.
  245 --
  246 -- This funtion is /unsafe/. If the original @CString@ is later
  247 -- modified, this change will be reflected in the resulting @ByteString@,
  248 -- breaking referential transparency.
  249 --
  250 -- This function is also unsafe if you call its finalizer twice,
  251 -- which will result in a /double free/ error, or if you pass it
  252 -- a CString not allocated with 'malloc'.
  253 --
  254 unsafePackMallocCString :: CString -> IO ByteString
  255 unsafePackMallocCString cstr = do
  256     fp <- newForeignPtr c_free_finalizer (castPtr cstr)
  257     len <- c_strlen cstr
  258     return $! PS fp 0 (fromIntegral len)
  259 
  260 -- ---------------------------------------------------------------------
  261 
  262 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a
  263 -- @CString@.
  264 --
  265 -- This function does zero copying, and merely unwraps a @ByteString@ to
  266 -- appear as a @CString@. It is /unsafe/ in two ways:
  267 --
  268 -- * After calling this function the @CString@ shares the underlying
  269 -- byte buffer with the original @ByteString@. Thus modifying the
  270 -- @CString@, either in C, or using poke, will cause the contents of the
  271 -- @ByteString@ to change, breaking referential transparency. Other
  272 -- @ByteStrings@ created by sharing (such as those produced via 'take'
  273 -- or 'drop') will also reflect these changes. Modifying the @CString@
  274 -- will break referential transparency. To avoid this, use
  275 -- @useAsCString@, which makes a copy of the original @ByteString@.
  276 --
  277 -- * @CStrings@ are often passed to functions that require them to be
  278 -- null-terminated. If the original @ByteString@ wasn't null terminated,
  279 -- neither will the @CString@ be. It is the programmers responsibility
  280 -- to guarantee that the @ByteString@ is indeed null terminated. If in
  281 -- doubt, use @useAsCString@.
  282 --
  283 unsafeUseAsCString :: ByteString -> (CString -> IO a) -> IO a
  284 unsafeUseAsCString (PS ps s _) ac = withForeignPtr ps $ \p -> ac (castPtr p `plusPtr` s)
  285 
  286 -- | /O(1) construction/ Use a @ByteString@ with a function requiring a
  287 -- @CStringLen@.
  288 -- 
  289 -- This function does zero copying, and merely unwraps a @ByteString@ to
  290 -- appear as a @CStringLen@. It is /unsafe/:
  291 --
  292 -- * After calling this function the @CStringLen@ shares the underlying
  293 -- byte buffer with the original @ByteString@. Thus modifying the
  294 -- @CStringLen@, either in C, or using poke, will cause the contents of the
  295 -- @ByteString@ to change, breaking referential transparency. Other
  296 -- @ByteStrings@ created by sharing (such as those produced via 'take'
  297 -- or 'drop') will also reflect these changes. Modifying the @CStringLen@
  298 -- will break referential transparency. To avoid this, use
  299 -- @useAsCStringLen@, which makes a copy of the original @ByteString@.
  300 --
  301 unsafeUseAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
  302 unsafeUseAsCStringLen (PS ps s l) f = withForeignPtr ps $ \p -> f (castPtr p `plusPtr` s,l)