1 {-# LANGUAGE CPP #-}
    2 {-# OPTIONS_GHC -XMagicHash -XUnboxedTuples #-}
    3 
    4 -- #prune
    5 
    6 -- |
    7 -- Module      : Data.ByteString
    8 -- Copyright   : (c) The University of Glasgow 2001,
    9 --               (c) David Roundy 2003-2005,
   10 --               (c) Simon Marlow 2005
   11 --               (c) Bjorn Bringert 2006
   12 --               (c) Don Stewart 2005-2008
   13 --
   14 --               Array fusion code:
   15 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
   16 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
   17 --
   18 -- License     : BSD-style
   19 --
   20 -- Maintainer  : dons@cse.unsw.edu.au
   21 -- Stability   : experimental
   22 -- Portability : portable
   23 -- 
   24 -- A time and space-efficient implementation of byte vectors using
   25 -- packed Word8 arrays, suitable for high performance use, both in terms
   26 -- of large data quantities, or high speed requirements. Byte vectors
   27 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
   28 -- and can be passed between C and Haskell with little effort.
   29 --
   30 -- This module is intended to be imported @qualified@, to avoid name
   31 -- clashes with "Prelude" functions.  eg.
   32 --
   33 -- > import qualified Data.ByteString as B
   34 --
   35 -- Original GHC implementation by Bryan O\'Sullivan.
   36 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
   37 -- Rewritten to support slices and use 'ForeignPtr' by David Roundy.
   38 -- Polished and extended by Don Stewart.
   39 --
   40 
   41 module Data.ByteString (
   42 
   43         -- * The @ByteString@ type
   44         ByteString,             -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
   45 
   46         -- * Introducing and eliminating 'ByteString's
   47         empty,                  -- :: ByteString
   48         singleton,              -- :: Word8   -> ByteString
   49         pack,                   -- :: [Word8] -> ByteString
   50         unpack,                 -- :: ByteString -> [Word8]
   51 
   52         -- * Basic interface
   53         cons,                   -- :: Word8 -> ByteString -> ByteString
   54         snoc,                   -- :: ByteString -> Word8 -> ByteString
   55         append,                 -- :: ByteString -> ByteString -> ByteString
   56         head,                   -- :: ByteString -> Word8
   57         uncons,                 -- :: ByteString -> Maybe (Word8, ByteString)
   58         last,                   -- :: ByteString -> Word8
   59         tail,                   -- :: ByteString -> ByteString
   60         init,                   -- :: ByteString -> ByteString
   61         null,                   -- :: ByteString -> Bool
   62         length,                 -- :: ByteString -> Int
   63 
   64         -- * Transforming ByteStrings
   65         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
   66         reverse,                -- :: ByteString -> ByteString
   67         intersperse,            -- :: Word8 -> ByteString -> ByteString
   68         intercalate,            -- :: ByteString -> [ByteString] -> ByteString
   69         transpose,              -- :: [ByteString] -> [ByteString]
   70 
   71         -- * Reducing 'ByteString's (folds)
   72         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
   73         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
   74         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   75         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   76 
   77         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
   78         foldr',                 -- :: (Word8 -> a -> a) -> a -> ByteString -> a
   79         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   80         foldr1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   81 
   82         -- ** Special folds
   83         concat,                 -- :: [ByteString] -> ByteString
   84         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
   85         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
   86         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
   87         maximum,                -- :: ByteString -> Word8
   88         minimum,                -- :: ByteString -> Word8
   89 
   90         -- * Building ByteStrings
   91         -- ** Scans
   92         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
   93         scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
   94         scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
   95         scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
   96 
   97         -- ** Accumulating maps
   98         mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
   99         mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  100 
  101         -- ** Generating and unfolding ByteStrings
  102         replicate,              -- :: Int -> Word8 -> ByteString
  103         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
  104         unfoldrN,               -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
  105 
  106         -- * Substrings
  107 
  108         -- ** Breaking strings
  109         take,                   -- :: Int -> ByteString -> ByteString
  110         drop,                   -- :: Int -> ByteString -> ByteString
  111         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
  112         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  113         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  114         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  115         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  116         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  117         breakEnd,               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  118         group,                  -- :: ByteString -> [ByteString]
  119         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
  120         inits,                  -- :: ByteString -> [ByteString]
  121         tails,                  -- :: ByteString -> [ByteString]
  122 
  123         -- ** Breaking into many substrings
  124         split,                  -- :: Word8 -> ByteString -> [ByteString]
  125         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
  126 
  127         -- * Predicates
  128         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
  129         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
  130         isInfixOf,              -- :: ByteString -> ByteString -> Bool
  131 
  132         -- ** Search for arbitrary substrings
  133         breakSubstring,         -- :: ByteString -> ByteString -> (ByteString,ByteString)
  134         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
  135         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
  136 
  137         -- * Searching ByteStrings
  138 
  139         -- ** Searching by equality
  140         elem,                   -- :: Word8 -> ByteString -> Bool
  141         notElem,                -- :: Word8 -> ByteString -> Bool
  142 
  143         -- ** Searching with a predicate
  144         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
  145         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
  146         partition,              -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  147 
  148         -- * Indexing ByteStrings
  149         index,                  -- :: ByteString -> Int -> Word8
  150         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
  151         elemIndices,            -- :: Word8 -> ByteString -> [Int]
  152         elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int
  153         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
  154         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
  155         count,                  -- :: Word8 -> ByteString -> Int
  156 
  157         -- * Zipping and unzipping ByteStrings
  158         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
  159         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
  160         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
  161 
  162         -- * Ordered ByteStrings
  163         sort,                   -- :: ByteString -> ByteString
  164 
  165         -- * Low level conversions
  166         -- ** Copying ByteStrings
  167         copy,                   -- :: ByteString -> ByteString
  168 
  169         -- ** Packing 'CString's and pointers
  170         packCString,            -- :: CString -> IO ByteString
  171         packCStringLen,         -- :: CStringLen -> IO ByteString
  172 
  173         -- ** Using ByteStrings as 'CString's
  174         useAsCString,           -- :: ByteString -> (CString    -> IO a) -> IO a
  175         useAsCStringLen,        -- :: ByteString -> (CStringLen -> IO a) -> IO a
  176 
  177         -- * I\/O with 'ByteString's
  178 
  179         -- ** Standard input and output
  180         getLine,                -- :: IO ByteString
  181         getContents,            -- :: IO ByteString
  182         putStr,                 -- :: ByteString -> IO ()
  183         putStrLn,               -- :: ByteString -> IO ()
  184         interact,               -- :: (ByteString -> ByteString) -> IO ()
  185 
  186         -- ** Files
  187         readFile,               -- :: FilePath -> IO ByteString
  188         writeFile,              -- :: FilePath -> ByteString -> IO ()
  189         appendFile,             -- :: FilePath -> ByteString -> IO ()
  190 
  191         -- ** I\/O with Handles
  192         hGetLine,               -- :: Handle -> IO ByteString
  193         hGetContents,           -- :: Handle -> IO ByteString
  194         hGet,                   -- :: Handle -> Int -> IO ByteString
  195         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
  196         hPut,                   -- :: Handle -> ByteString -> IO ()
  197         hPutStr,                -- :: Handle -> ByteString -> IO ()
  198         hPutStrLn,              -- :: Handle -> ByteString -> IO ()
  199 
  200         breakByte
  201 
  202   ) where
  203 
  204 import qualified Prelude as P
  205 import Prelude hiding           (reverse,head,tail,last,init,null
  206                                 ,length,map,lines,foldl,foldr,unlines
  207                                 ,concat,any,take,drop,splitAt,takeWhile
  208                                 ,dropWhile,span,break,elem,filter,maximum
  209                                 ,minimum,all,concatMap,foldl1,foldr1
  210                                 ,scanl,scanl1,scanr,scanr1
  211                                 ,readFile,writeFile,appendFile,replicate
  212                                 ,getContents,getLine,putStr,putStrLn,interact
  213                                 ,zip,zipWith,unzip,notElem)
  214 
  215 import Data.ByteString.Internal
  216 import Data.ByteString.Unsafe
  217 
  218 import qualified Data.List as List
  219 
  220 import Data.Word                (Word8)
  221 import Data.Maybe               (isJust, listToMaybe)
  222 
  223 -- Control.Exception.bracket not available in yhc or nhc
  224 #ifndef __NHC__
  225 import Control.Exception        (finally, bracket, assert)
  226 import qualified Control.Exception as Exception
  227 #else
  228 import IO                  (bracket, finally)
  229 #endif
  230 import Control.Monad            (when)
  231 
  232 import Foreign.C.String         (CString, CStringLen)
  233 import Foreign.C.Types          (CSize)
  234 import Foreign.ForeignPtr
  235 import Foreign.Marshal.Alloc    (allocaBytes, mallocBytes, reallocBytes, finalizerFree)
  236 import Foreign.Marshal.Array    (allocaArray)
  237 import Foreign.Ptr
  238 import Foreign.Storable         (Storable(..))
  239 
  240 -- hGetBuf and hPutBuf not available in yhc or nhc
  241 import System.IO                (stdin,stdout,hClose,hFileSize
  242                                 ,hGetBuf,hPutBuf,openBinaryFile
  243                                 ,Handle,IOMode(..))
  244 
  245 import Data.Monoid              (Monoid, mempty, mappend, mconcat)
  246 
  247 #if !defined(__GLASGOW_HASKELL__)
  248 import System.IO.Unsafe
  249 import qualified System.Environment
  250 import qualified System.IO      (hGetLine)
  251 #endif
  252 
  253 #if defined(__GLASGOW_HASKELL__)
  254 
  255 import System.IO                (hGetBufNonBlocking)
  256 import System.IO.Error          (isEOFError)
  257 
  258 import GHC.Handle
  259 import GHC.Prim                 (Word#, (+#), writeWord8OffAddr#)
  260 import GHC.Base                 (build)
  261 import GHC.Word hiding (Word8)
  262 import GHC.Ptr                  (Ptr(..))
  263 import GHC.ST                   (ST(..))
  264 import GHC.IOBase
  265 
  266 #endif
  267 
  268 -- An alternative to Control.Exception (assert) for nhc98
  269 #ifdef __NHC__
  270 #define assert  assertS "__FILE__ : __LINE__"
  271 assertS :: String -> Bool -> a -> a
  272 assertS _ True  = id
  273 assertS s False = error ("assertion failed at "++s)
  274 #endif
  275 
  276 -- -----------------------------------------------------------------------------
  277 --
  278 -- Useful macros, until we have bang patterns
  279 --
  280 
  281 #define STRICT1(f) f a | a `seq` False = undefined
  282 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
  283 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
  284 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
  285 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
  286 
  287 -- -----------------------------------------------------------------------------
  288 
  289 instance Eq  ByteString where
  290     (==)    = eq
  291 
  292 instance Ord ByteString where
  293     compare = compareBytes
  294 
  295 instance Monoid ByteString where
  296     mempty  = empty
  297     mappend = append
  298     mconcat = concat
  299 
  300 -- | /O(n)/ Equality on the 'ByteString' type.
  301 eq :: ByteString -> ByteString -> Bool
  302 eq a@(PS p s l) b@(PS p' s' l')
  303     | l /= l'            = False    -- short cut on length
  304     | p == p' && s == s' = True     -- short cut for the same string
  305     | otherwise          = compareBytes a b == EQ
  306 {-# INLINE eq #-}
  307 -- ^ still needed
  308 
  309 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices. 
  310 compareBytes :: ByteString -> ByteString -> Ordering
  311 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
  312     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
  313     | otherwise                         = inlinePerformIO $
  314         withForeignPtr x1 $ \p1 ->
  315         withForeignPtr x2 $ \p2 -> do
  316             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
  317             return $! case i `compare` 0 of
  318                         EQ  -> l1 `compare` l2
  319                         x   -> x
  320 
  321 {-
  322 
  323 -- Pure Haskell version
  324 
  325 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2)
  326 --    | len1 == 0  && len2 == 0                     = EQ  -- short cut for empty strings
  327 --    | fp1 == fp2 && off1 == off2 && len1 == len2  = EQ  -- short cut for the same string
  328     | otherwise                                   = inlinePerformIO $
  329     withForeignPtr fp1 $ \p1 ->
  330         withForeignPtr fp2 $ \p2 ->
  331             cmp (p1 `plusPtr` off1)
  332                 (p2 `plusPtr` off2) 0 len1 len2
  333 
  334 -- XXX todo.
  335 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
  336 cmp p1 p2 n len1 len2
  337       | n == len1 = if n == len2 then return EQ else return LT
  338       | n == len2 = return GT
  339       | otherwise = do
  340           a <- peekByteOff p1 n :: IO Word8
  341           b <- peekByteOff p2 n
  342           case a `compare` b of
  343                 EQ -> cmp p1 p2 (n+1) len1 len2
  344                 LT -> return LT
  345                 GT -> return GT
  346 -}
  347 
  348 -- -----------------------------------------------------------------------------
  349 -- Introducing and eliminating 'ByteString's
  350 
  351 -- | /O(1)/ The empty 'ByteString'
  352 empty :: ByteString
  353 empty = PS nullForeignPtr 0 0
  354 
  355 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
  356 singleton :: Word8 -> ByteString
  357 singleton c = unsafeCreate 1 $ \p -> poke p c
  358 {-# INLINE [1] singleton #-}
  359 
  360 -- Inline [1] for intercalate rule
  361 
  362 --
  363 -- XXX The use of unsafePerformIO in allocating functions (unsafeCreate) is critical!
  364 --
  365 -- Otherwise:
  366 --
  367 --  singleton 255 `compare` singleton 127
  368 --
  369 -- is compiled to:
  370 --
  371 --  case mallocByteString 2 of 
  372 --      ForeignPtr f internals -> 
  373 --           case writeWord8OffAddr# f 0 255 of _ -> 
  374 --           case writeWord8OffAddr# f 0 127 of _ ->
  375 --           case eqAddr# f f of 
  376 --                  False -> case compare (GHC.Prim.plusAddr# f 0) 
  377 --                                        (GHC.Prim.plusAddr# f 0)
  378 --
  379 --
  380 
  381 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. 
  382 --
  383 -- For applications with large numbers of string literals, pack can be a
  384 -- bottleneck. In such cases, consider using packAddress (GHC only).
  385 pack :: [Word8] -> ByteString
  386 
  387 #if !defined(__GLASGOW_HASKELL__)
  388 
  389 pack str = unsafeCreate (P.length str) $ \p -> go p str
  390     where
  391         go _ []     = return ()
  392         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
  393 
  394 #else /* hack away */
  395 
  396 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
  397     where
  398         go _ _ []        = return ()
  399         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
  400 
  401         writeByte p i c = ST $ \s# ->
  402             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
  403 
  404 #endif
  405 
  406 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
  407 unpack :: ByteString -> [Word8]
  408 
  409 #if !defined(__GLASGOW_HASKELL__)
  410 
  411 unpack (PS _  _ 0) = []
  412 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
  413         go (p `plusPtr` s) (l - 1) []
  414     where
  415         STRICT3(go)
  416         go p 0 acc = peek p          >>= \e -> return (e : acc)
  417         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
  418 {-# INLINE unpack #-}
  419 
  420 #else
  421 
  422 unpack ps = build (unpackFoldr ps)
  423 {-# INLINE unpack #-}
  424 
  425 --
  426 -- Have unpack fuse with good list consumers
  427 --
  428 -- critical this isn't strict in the acc
  429 -- as it will break in the presence of list fusion. this is a known
  430 -- issue with seq and build/foldr rewrite rules, which rely on lazy
  431 -- demanding to avoid bottoms in the list.
  432 --
  433 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
  434 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
  435     let loop q n    _   | q `seq` n `seq` False = undefined -- n.b.
  436         loop _ (-1) acc = return acc
  437         loop q n    acc = do
  438            a <- peekByteOff q n
  439            loop q (n-1) (a `f` acc)
  440     loop (p `plusPtr` off) (len-1) ch
  441 {-# INLINE [0] unpackFoldr #-}
  442 
  443 unpackList :: ByteString -> [Word8]
  444 unpackList (PS fp off len) = withPtr fp $ \p -> do
  445     let STRICT3(loop)                            
  446         loop _ (-1) acc = return acc
  447         loop q n acc = do
  448            a <- peekByteOff q n
  449            loop q (n-1) (a : acc)
  450     loop (p `plusPtr` off) (len-1) []
  451 
  452 {-# RULES
  453 "ByteString unpack-list" [1]  forall p  .
  454     unpackFoldr p (:) [] = unpackList p
  455  #-}
  456 
  457 #endif
  458 
  459 -- ---------------------------------------------------------------------
  460 -- Basic interface
  461 
  462 -- | /O(1)/ Test whether a ByteString is empty.
  463 null :: ByteString -> Bool
  464 null (PS _ _ l) = assert (l >= 0) $ l <= 0
  465 {-# INLINE null #-}
  466 
  467 -- ---------------------------------------------------------------------
  468 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
  469 length :: ByteString -> Int
  470 length (PS _ _ l) = assert (l >= 0) $ l
  471 {-# INLINE length #-}
  472 
  473 ------------------------------------------------------------------------
  474 
  475 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
  476 -- complexity, as it requires a memcpy.
  477 cons :: Word8 -> ByteString -> ByteString
  478 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
  479         poke p c
  480         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
  481 {-# INLINE cons #-}
  482 
  483 -- | /O(n)/ Append a byte to the end of a 'ByteString'
  484 snoc :: ByteString -> Word8 -> ByteString
  485 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
  486         memcpy p (f `plusPtr` s) (fromIntegral l)
  487         poke (p `plusPtr` l) c
  488 {-# INLINE snoc #-}
  489 
  490 -- todo fuse
  491 
  492 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
  493 -- An exception will be thrown in the case of an empty ByteString.
  494 head :: ByteString -> Word8
  495 head (PS x s l)
  496     | l <= 0    = errorEmptyList "head"
  497     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
  498 {-# INLINE head #-}
  499 
  500 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
  501 -- An exception will be thrown in the case of an empty ByteString.
  502 tail :: ByteString -> ByteString
  503 tail (PS p s l)
  504     | l <= 0    = errorEmptyList "tail"
  505     | otherwise = PS p (s+1) (l-1)
  506 {-# INLINE tail #-}
  507 
  508 -- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing
  509 -- if it is empty.
  510 uncons :: ByteString -> Maybe (Word8, ByteString)
  511 uncons (PS x s l)
  512     | l <= 0    = Nothing
  513     | otherwise = Just (inlinePerformIO $ withForeignPtr x
  514                                         $ \p -> peekByteOff p s,
  515                         PS x (s+1) (l-1))
  516 {-# INLINE uncons #-}
  517 
  518 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
  519 -- An exception will be thrown in the case of an empty ByteString.
  520 last :: ByteString -> Word8
  521 last ps@(PS x s l)
  522     | null ps   = errorEmptyList "last"
  523     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
  524 {-# INLINE last #-}
  525 
  526 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
  527 -- An exception will be thrown in the case of an empty ByteString.
  528 init :: ByteString -> ByteString
  529 init ps@(PS p s l)
  530     | null ps   = errorEmptyList "init"
  531     | otherwise = PS p s (l-1)
  532 {-# INLINE init #-}
  533 
  534 -- | /O(n)/ Append two ByteStrings
  535 append :: ByteString -> ByteString -> ByteString
  536 append xs ys | null xs   = ys
  537              | null ys   = xs
  538              | otherwise = concat [xs,ys]
  539 {-# INLINE append #-}
  540 
  541 -- ---------------------------------------------------------------------
  542 -- Transformations
  543 
  544 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
  545 -- element of @xs@. This function is subject to array fusion.
  546 map :: (Word8 -> Word8) -> ByteString -> ByteString
  547 map f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
  548     create len $ map_ 0 (a `plusPtr` s)
  549   where
  550     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
  551     STRICT3(map_)                            
  552     map_ n p1 p2
  553        | n >= len = return ()
  554        | otherwise = do
  555             x <- peekByteOff p1 n
  556             pokeByteOff p2 n (f x)
  557             map_ (n+1) p1 p2
  558 {-# INLINE map #-}
  559 
  560 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
  561 reverse :: ByteString -> ByteString
  562 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
  563         c_reverse p (f `plusPtr` s) (fromIntegral l)
  564 
  565 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
  566 -- 'ByteString' and \`intersperses\' that byte between the elements of
  567 -- the 'ByteString'.  It is analogous to the intersperse function on
  568 -- Lists.
  569 intersperse :: Word8 -> ByteString -> ByteString
  570 intersperse c ps@(PS x s l)
  571     | length ps < 2  = ps
  572     | otherwise      = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
  573         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
  574 
  575 -- | The 'transpose' function transposes the rows and columns of its
  576 -- 'ByteString' argument.
  577 transpose :: [ByteString] -> [ByteString]
  578 transpose ps = P.map pack (List.transpose (P.map unpack ps))
  579 
  580 -- ---------------------------------------------------------------------
  581 -- Reducing 'ByteString's
  582 
  583 -- | 'foldl', applied to a binary operator, a starting value (typically
  584 -- the left-identity of the operator), and a ByteString, reduces the
  585 -- ByteString using the binary operator, from left to right.
  586 --
  587 -- This function is subject to array fusion.
  588 --
  589 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
  590 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  591         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
  592     where
  593         STRICT3(lgo)                            
  594         lgo z p q | p == q    = return z
  595                   | otherwise = do c <- peek p
  596                                    lgo (f z c) (p `plusPtr` 1) q
  597 {-# INLINE foldl #-}
  598 
  599 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
  600 -- However, for ByteStrings, all left folds are strict in the accumulator.
  601 --
  602 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
  603 foldl' = foldl
  604 {-# INLINE foldl' #-}
  605 
  606 -- | 'foldr', applied to a binary operator, a starting value
  607 -- (typically the right-identity of the operator), and a ByteString,
  608 -- reduces the ByteString using the binary operator, from right to left.
  609 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
  610 foldr k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  611         go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
  612     where
  613         STRICT3(go)                            
  614         go z p q | p == q    = return z
  615                  | otherwise = do c  <- peek p
  616                                   go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
  617 {-# INLINE foldr #-}
  618 
  619 -- | 'foldr\'' is like 'foldr', but strict in the accumulator.
  620 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
  621 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  622         go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
  623     where
  624         STRICT3(go)                            
  625         go z p q | p == q    = return z
  626                  | otherwise = do c  <- peek p
  627                                   go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
  628 {-# INLINE foldr' #-}
  629 
  630 -- | 'foldl1' is a variant of 'foldl' that has no starting value
  631 -- argument, and thus must be applied to non-empty 'ByteStrings'.
  632 -- This function is subject to array fusion. 
  633 -- An exception will be thrown in the case of an empty ByteString.
  634 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  635 foldl1 f ps
  636     | null ps   = errorEmptyList "foldl1"
  637     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
  638 {-# INLINE foldl1 #-}
  639 
  640 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
  641 -- An exception will be thrown in the case of an empty ByteString.
  642 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  643 foldl1' f ps
  644     | null ps   = errorEmptyList "foldl1'"
  645     | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
  646 {-# INLINE foldl1' #-}
  647 
  648 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
  649 -- and thus must be applied to non-empty 'ByteString's
  650 -- An exception will be thrown in the case of an empty ByteString.
  651 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  652 foldr1 f ps
  653     | null ps        = errorEmptyList "foldr1"
  654     | otherwise      = foldr f (last ps) (init ps)
  655 {-# INLINE foldr1 #-}
  656 
  657 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
  658 -- accumulator.
  659 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  660 foldr1' f ps
  661     | null ps        = errorEmptyList "foldr1"
  662     | otherwise      = foldr' f (last ps) (init ps)
  663 {-# INLINE foldr1' #-}
  664 
  665 -- ---------------------------------------------------------------------
  666 -- Special folds
  667 
  668 -- | /O(n)/ Concatenate a list of ByteStrings.
  669 concat :: [ByteString] -> ByteString
  670 concat []     = empty
  671 concat [ps]   = ps
  672 concat xs     = unsafeCreate len $ \ptr -> go xs ptr
  673   where len = P.sum . P.map length $ xs
  674         STRICT2(go)                  
  675         go []            _   = return ()
  676         go (PS p s l:ps) ptr = do
  677                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
  678                 go ps (ptr `plusPtr` l)
  679 
  680 -- | Map a function over a 'ByteString' and concatenate the results
  681 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
  682 concatMap f = concat . foldr ((:) . f) []
  683 
  684 -- foldr (append . f) empty
  685 
  686 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
  687 -- any element of the 'ByteString' satisfies the predicate.
  688 any :: (Word8 -> Bool) -> ByteString -> Bool
  689 any _ (PS _ _ 0) = False
  690 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  691         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
  692     where
  693         STRICT2(go)                  
  694         go p q | p == q    = return False
  695                | otherwise = do c <- peek p
  696                                 if f c then return True
  697                                        else go (p `plusPtr` 1) q
  698 {-# INLINE any #-}
  699 
  700 -- todo fuse
  701 
  702 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
  703 -- if all elements of the 'ByteString' satisfy the predicate.
  704 all :: (Word8 -> Bool) -> ByteString -> Bool
  705 all _ (PS _ _ 0) = True
  706 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  707         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
  708     where
  709         STRICT2(go)                  
  710         go p q | p == q     = return True  -- end of list
  711                | otherwise  = do c <- peek p
  712                                  if f c
  713                                     then go (p `plusPtr` 1) q
  714                                     else return False
  715 {-# INLINE all #-}
  716 
  717 ------------------------------------------------------------------------
  718 
  719 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
  720 -- This function will fuse.
  721 -- An exception will be thrown in the case of an empty ByteString.
  722 maximum :: ByteString -> Word8
  723 maximum xs@(PS x s l)
  724     | null xs   = errorEmptyList "maximum"
  725     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
  726                       c_maximum (p `plusPtr` s) (fromIntegral l)
  727 {-# INLINE maximum #-}
  728 
  729 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
  730 -- This function will fuse.
  731 -- An exception will be thrown in the case of an empty ByteString.
  732 minimum :: ByteString -> Word8
  733 minimum xs@(PS x s l)
  734     | null xs   = errorEmptyList "minimum"
  735     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
  736                       c_minimum (p `plusPtr` s) (fromIntegral l)
  737 {-# INLINE minimum #-}
  738 
  739 ------------------------------------------------------------------------
  740 
  741 -- | The 'mapAccumL' function behaves like a combination of 'map' and
  742 -- 'foldl'; it applies a function to each element of a ByteString,
  743 -- passing an accumulating parameter from left to right, and returning a
  744 -- final value of this accumulator together with the new list.
  745 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  746 mapAccumL f acc (PS fp o len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
  747     gp   <- mallocByteString len
  748     acc' <- withForeignPtr gp $ \p -> mapAccumL_ acc 0 (a `plusPtr` o) p
  749     return $! (acc', PS gp 0 len)
  750   where
  751     STRICT4(mapAccumL_)
  752     mapAccumL_ s n p1 p2
  753        | n >= len = return s
  754        | otherwise = do
  755             x <- peekByteOff p1 n
  756             let (s', y) = f s x
  757             pokeByteOff p2 n y
  758             mapAccumL_ s' (n+1) p1 p2
  759 {-# INLINE mapAccumL #-}
  760 
  761 -- | The 'mapAccumR' function behaves like a combination of 'map' and
  762 -- 'foldr'; it applies a function to each element of a ByteString,
  763 -- passing an accumulating parameter from right to left, and returning a
  764 -- final value of this accumulator together with the new ByteString.
  765 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  766 mapAccumR f acc (PS fp o len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
  767     gp   <- mallocByteString len
  768     acc' <- withForeignPtr gp $ \p -> mapAccumR_ acc (len-1) (a `plusPtr` o) p
  769     return $! (acc', PS gp 0 len)
  770   where
  771     STRICT4(mapAccumR_)
  772     mapAccumR_ s n p q
  773        | n <  0    = return s
  774        | otherwise = do
  775             x  <- peekByteOff p n
  776             let (s', y) = f s x
  777             pokeByteOff q n y
  778             mapAccumR_ s' (n-1) p q
  779 {-# INLINE mapAccumR #-}
  780 
  781 -- ---------------------------------------------------------------------
  782 -- Building ByteStrings
  783 
  784 -- | 'scanl' is similar to 'foldl', but returns a list of successive
  785 -- reduced values from the left. This function will fuse.
  786 --
  787 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
  788 --
  789 -- Note that
  790 --
  791 -- > last (scanl f z xs) == foldl f z xs.
  792 --
  793 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  794 
  795 scanl f v (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
  796     create (len+1) $ \q -> do
  797         poke q v
  798         scanl_ v 0 (a `plusPtr` s) (q `plusPtr` 1)
  799   where
  800     STRICT4(scanl_)
  801     scanl_ z n p q
  802         | n >= len  = return ()
  803         | otherwise = do
  804             x <- peekByteOff p n
  805             let z' = f z x
  806             pokeByteOff q n z'
  807             scanl_ z' (n+1) p q
  808 {-# INLINE scanl #-}
  809 
  810     -- n.b. haskell's List scan returns a list one bigger than the
  811     -- input, so we need to snoc here to get some extra space, however,
  812     -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
  813 
  814 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
  815 -- This function will fuse.
  816 --
  817 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
  818 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  819 scanl1 f ps
  820     | null ps   = empty
  821     | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
  822 {-# INLINE scanl1 #-}
  823 
  824 -- | scanr is the right-to-left dual of scanl.
  825 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  826 scanr f v (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
  827     create (len+1) $ \q -> do
  828         poke (q `plusPtr` len) v
  829         scanr_ v (len-1) (a `plusPtr` s) q
  830   where
  831     STRICT4(scanr_)
  832     scanr_ z n p q
  833         | n <  0    = return ()
  834         | otherwise = do
  835             x <- peekByteOff p n
  836             let z' = f x z
  837             pokeByteOff q n z'
  838             scanr_ z' (n-1) p q
  839 {-# INLINE scanr #-}
  840 
  841 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
  842 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  843 scanr1 f ps
  844     | null ps   = empty
  845     | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
  846 {-# INLINE scanr1 #-}
  847 
  848 -- ---------------------------------------------------------------------
  849 -- Unfolds and replicates
  850 
  851 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
  852 -- the value of every element. The following holds:
  853 --
  854 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
  855 --
  856 -- This implemenation uses @memset(3)@
  857 replicate :: Int -> Word8 -> ByteString
  858 replicate w c
  859     | w <= 0    = empty
  860     | otherwise = unsafeCreate w $ \ptr ->
  861                       memset ptr c (fromIntegral w) >> return ()
  862 
  863 -- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr' 
  864 -- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a 
  865 -- ByteString from a seed value.  The function takes the element and 
  866 -- returns 'Nothing' if it is done producing the ByteString or returns 
  867 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, 
  868 -- and @b@ is the seed value for further production.
  869 --
  870 -- Examples:
  871 --
  872 -- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
  873 -- > == pack [0, 1, 2, 3, 4, 5]
  874 --
  875 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
  876 unfoldr f = concat . unfoldChunk 32 64
  877   where unfoldChunk n n' x =
  878           case unfoldrN n f x of
  879             (s, Nothing) -> s : []
  880             (s, Just x') -> s : unfoldChunk n' (n+n') x'
  881 {-# INLINE unfoldr #-}
  882 
  883 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
  884 -- value.  However, the length of the result is limited by the first
  885 -- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
  886 -- when the maximum length of the result is known.
  887 --
  888 -- The following equation relates 'unfoldrN' and 'unfoldr':
  889 --
  890 -- > unfoldrN n f s == take n (unfoldr f s)
  891 --
  892 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
  893 unfoldrN i f x0
  894     | i < 0     = (empty, Just x0)
  895     | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
  896   where STRICT3(go)                            
  897         go p x n =
  898           case f x of
  899             Nothing      -> return (0, n, Nothing)
  900             Just (w,x')
  901              | n == i    -> return (0, n, Just x)
  902              | otherwise -> do poke p w
  903                                go (p `plusPtr` 1) x' (n+1)
  904 {-# INLINE unfoldrN #-}
  905 
  906 -- ---------------------------------------------------------------------
  907 -- Substrings
  908 
  909 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
  910 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
  911 take :: Int -> ByteString -> ByteString
  912 take n ps@(PS x s l)
  913     | n <= 0    = empty
  914     | n >= l    = ps
  915     | otherwise = PS x s n
  916 {-# INLINE take #-}
  917 
  918 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
  919 -- elements, or @[]@ if @n > 'length' xs@.
  920 drop  :: Int -> ByteString -> ByteString
  921 drop n ps@(PS x s l)
  922     | n <= 0    = ps
  923     | n >= l    = empty
  924     | otherwise = PS x (s+n) (l-n)
  925 {-# INLINE drop #-}
  926 
  927 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
  928 splitAt :: Int -> ByteString -> (ByteString, ByteString)
  929 splitAt n ps@(PS x s l)
  930     | n <= 0    = (empty, ps)
  931     | n >= l    = (ps, empty)
  932     | otherwise = (PS x s n, PS x (s+n) (l-n))
  933 {-# INLINE splitAt #-}
  934 
  935 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
  936 -- returns the longest prefix (possibly empty) of @xs@ of elements that
  937 -- satisfy @p@.
  938 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  939 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
  940 {-# INLINE takeWhile #-}
  941 
  942 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
  943 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  944 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
  945 {-# INLINE dropWhile #-}
  946 
  947 -- instead of findIndexOrEnd, we could use memchr here.
  948 
  949 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
  950 --
  951 -- Under GHC, a rewrite rule will transform break (==) into a
  952 -- call to the specialised breakByte:
  953 --
  954 -- > break ((==) x) = breakByte x
  955 -- > break (==x) = breakByte x
  956 --
  957 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  958 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
  959 #if __GLASGOW_HASKELL__ 
  960 {-# INLINE [1] break #-}
  961 #endif
  962 
  963 {-# RULES
  964 "ByteString specialise break (x==)" forall x.
  965     break ((==) x) = breakByte x
  966 "ByteString specialise break (==x)" forall x.
  967     break (==x) = breakByte x
  968   #-}
  969 
  970 -- INTERNAL:
  971 
  972 -- | 'breakByte' breaks its ByteString argument at the first occurence
  973 -- of the specified byte. It is more efficient than 'break' as it is
  974 -- implemented with @memchr(3)@. I.e.
  975 -- 
  976 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
  977 --
  978 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
  979 breakByte c p = case elemIndex c p of
  980     Nothing -> (p,empty)
  981     Just n  -> (unsafeTake n p, unsafeDrop n p)
  982 {-# INLINE breakByte #-}
  983 
  984 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
  985 -- 
  986 -- breakEnd p == spanEnd (not.p)
  987 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  988 breakEnd  p ps = splitAt (findFromEndUntil p ps) ps
  989 
  990 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
  991 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
  992 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  993 span p ps = break (not . p) ps
  994 #if __GLASGOW_HASKELL__
  995 {-# INLINE [1] span #-}
  996 #endif
  997 
  998 -- | 'spanByte' breaks its ByteString argument at the first
  999 -- occurence of a byte other than its argument. It is more efficient
 1000 -- than 'span (==)'
 1001 --
 1002 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 1003 --
 1004 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 1005 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 1006     go (p `plusPtr` s) 0
 1007   where
 1008     STRICT2(go)                  
 1009     go p i | i >= l    = return (ps, empty)
 1010            | otherwise = do c' <- peekByteOff p i
 1011                             if c /= c'
 1012                                 then return (unsafeTake i ps, unsafeDrop i ps)
 1013                                 else go p (i+1)
 1014 {-# INLINE spanByte #-}
 1015 
 1016 {-# RULES
 1017 "ByteString specialise span (x==)" forall x.
 1018     span ((==) x) = spanByte x
 1019 "ByteString specialise span (==x)" forall x.
 1020     span (==x) = spanByte x
 1021   #-}
 1022 
 1023 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
 1024 -- We have
 1025 --
 1026 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
 1027 --
 1028 -- and
 1029 --
 1030 -- > spanEnd (not . isSpace) ps
 1031 -- >    == 
 1032 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) 
 1033 --
 1034 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 1035 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
 1036 
 1037 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 1038 -- separators, where the predicate returns True for a separator element.
 1039 -- The resulting components do not contain the separators.  Two adjacent
 1040 -- separators result in an empty component in the output.  eg.
 1041 --
 1042 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 1043 -- > splitWith (=='a') []        == []
 1044 --
 1045 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 1046 
 1047 #if defined(__GLASGOW_HASKELL__)
 1048 splitWith _pred (PS _  _   0) = []
 1049 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
 1050   where pred# c# = pred_ (W8# c#)
 1051 
 1052         STRICT4(splitWith0)
 1053         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
 1054             splitLoop pred' p 0 off' len' fp'
 1055 
 1056         splitLoop :: (Word# -> Bool)
 1057                   -> Ptr Word8
 1058                   -> Int -> Int -> Int
 1059                   -> ForeignPtr Word8
 1060                   -> IO [ByteString]
 1061 
 1062         splitLoop pred' p idx' off' len' fp'
 1063             | idx' >= len'  = return [PS fp' off' idx']
 1064             | otherwise = do
 1065                 w <- peekElemOff p (off'+idx')
 1066                 if pred' (case w of W8# w# -> w#)
 1067                    then return (PS fp' off' idx' :
 1068                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
 1069                    else splitLoop pred' p (idx'+1) off' len' fp'
 1070 {-# INLINE splitWith #-}
 1071 
 1072 #else
 1073 splitWith _ (PS _ _ 0) = []
 1074 splitWith p ps = loop p ps
 1075     where
 1076         STRICT2(loop)
 1077         loop q qs = if null rest then [chunk]
 1078                                  else chunk : loop q (unsafeTail rest)
 1079             where (chunk,rest) = break q qs
 1080 #endif
 1081 
 1082 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
 1083 -- argument, consuming the delimiter. I.e.
 1084 --
 1085 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 1086 -- > split 'a'  "aXaXaXa"    == ["","X","X","X",""]
 1087 -- > split 'x'  "x"          == ["",""]
 1088 -- 
 1089 -- and
 1090 --
 1091 -- > intercalate [c] . split c == id
 1092 -- > split == splitWith . (==)
 1093 -- 
 1094 -- As for all splitting functions in this library, this function does
 1095 -- not copy the substrings, it just constructs new 'ByteStrings' that
 1096 -- are slices of the original.
 1097 --
 1098 split :: Word8 -> ByteString -> [ByteString]
 1099 split _ (PS _ _ 0) = []
 1100 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
 1101     let ptr = p `plusPtr` s
 1102 
 1103         STRICT1(loop)        
 1104         loop n =
 1105             let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
 1106                                            w (fromIntegral (l-n))
 1107             in if q == nullPtr
 1108                 then [PS x (s+n) (l-n)]
 1109                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
 1110 
 1111     return (loop 0)
 1112 {-# INLINE split #-}
 1113 
 1114 {-
 1115 -- slower. but stays inside Haskell.
 1116 split _ (PS _  _   0) = []
 1117 split (W8# w#) (PS fp off len) = splitWith' off len fp
 1118     where
 1119         splitWith' off' len' fp' = withPtr fp $ \p ->
 1120             splitLoop p 0 off' len' fp'
 1121 
 1122         splitLoop :: Ptr Word8
 1123                   -> Int -> Int -> Int
 1124                   -> ForeignPtr Word8
 1125                   -> IO [ByteString]
 1126 
 1127         STRICT5(splitLoop)
 1128         splitLoop p idx' off' len' fp'
 1129             | idx' >= len'  = return [PS fp' off' idx']
 1130             | otherwise = do
 1131                 (W8# x#) <- peekElemOff p (off'+idx')
 1132                 if word2Int# w# ==# word2Int# x#
 1133                    then return (PS fp' off' idx' :
 1134                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
 1135                    else splitLoop p (idx'+1) off' len' fp'
 1136 -}
 1137 
 1138 {-
 1139 -- | Like 'splitWith', except that sequences of adjacent separators are
 1140 -- treated as a single separator. eg.
 1141 -- 
 1142 -- > tokens (=='a') "aabbaca" == ["bb","c"]
 1143 --
 1144 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
 1145 tokens f = P.filter (not.null) . splitWith f
 1146 {-# INLINE tokens #-}
 1147 -}
 1148 
 1149 -- | The 'group' function takes a ByteString and returns a list of
 1150 -- ByteStrings such that the concatenation of the result is equal to the
 1151 -- argument.  Moreover, each sublist in the result contains only equal
 1152 -- elements.  For example,
 1153 --
 1154 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
 1155 --
 1156 -- It is a special case of 'groupBy', which allows the programmer to
 1157 -- supply their own equality test. It is about 40% faster than 
 1158 -- /groupBy (==)/
 1159 group :: ByteString -> [ByteString]
 1160 group xs
 1161     | null xs   = []
 1162     | otherwise = ys : group zs
 1163     where
 1164         (ys, zs) = spanByte (unsafeHead xs) xs
 1165 
 1166 -- | The 'groupBy' function is the non-overloaded version of 'group'.
 1167 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 1168 groupBy k xs
 1169     | null xs   = []
 1170     | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
 1171     where
 1172         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
 1173 
 1174 -- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of
 1175 -- 'ByteString's and concatenates the list after interspersing the first
 1176 -- argument between each element of the list.
 1177 intercalate :: ByteString -> [ByteString] -> ByteString
 1178 intercalate s = concat . (List.intersperse s)
 1179 {-# INLINE [1] intercalate #-}
 1180 
 1181 {-# RULES
 1182 "ByteString specialise intercalate c -> intercalateByte" forall c s1 s2 .
 1183     intercalate (singleton c) (s1 : s2 : []) = intercalateWithByte c s1 s2
 1184   #-}
 1185 
 1186 -- | /O(n)/ intercalateWithByte. An efficient way to join to two ByteStrings
 1187 -- with a char. Around 4 times faster than the generalised join.
 1188 --
 1189 intercalateWithByte :: Word8 -> ByteString -> ByteString -> ByteString
 1190 intercalateWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
 1191     withForeignPtr ffp $ \fp ->
 1192     withForeignPtr fgp $ \gp -> do
 1193         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 1194         poke (ptr `plusPtr` l) c
 1195         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
 1196     where
 1197       len = length f + length g + 1
 1198 {-# INLINE intercalateWithByte #-}
 1199 
 1200 -- ---------------------------------------------------------------------
 1201 -- Indexing ByteStrings
 1202 
 1203 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
 1204 index :: ByteString -> Int -> Word8
 1205 index ps n
 1206     | n < 0          = moduleError "index" ("negative index: " ++ show n)
 1207     | n >= length ps = moduleError "index" ("index too large: " ++ show n
 1208                                          ++ ", length = " ++ show (length ps))
 1209     | otherwise      = ps `unsafeIndex` n
 1210 {-# INLINE index #-}
 1211 
 1212 -- | /O(n)/ The 'elemIndex' function returns the index of the first
 1213 -- element in the given 'ByteString' which is equal to the query
 1214 -- element, or 'Nothing' if there is no such element. 
 1215 -- This implementation uses memchr(3).
 1216 elemIndex :: Word8 -> ByteString -> Maybe Int
 1217 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
 1218     let p' = p `plusPtr` s
 1219     q <- memchr p' c (fromIntegral l)
 1220     return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
 1221 {-# INLINE elemIndex #-}
 1222 
 1223 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
 1224 -- element in the given 'ByteString' which is equal to the query
 1225 -- element, or 'Nothing' if there is no such element. The following
 1226 -- holds:
 1227 --
 1228 -- > elemIndexEnd c xs == 
 1229 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
 1230 --
 1231 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
 1232 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 1233     go (p `plusPtr` s) (l-1)
 1234   where
 1235     STRICT2(go)                  
 1236     go p i | i < 0     = return Nothing
 1237            | otherwise = do ch' <- peekByteOff p i
 1238                             if ch == ch'
 1239                                 then return $ Just i
 1240                                 else go p (i-1)
 1241 {-# INLINE elemIndexEnd #-}
 1242 
 1243 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
 1244 -- the indices of all elements equal to the query element, in ascending order.
 1245 -- This implementation uses memchr(3).
 1246 elemIndices :: Word8 -> ByteString -> [Int]
 1247 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
 1248     let ptr = p `plusPtr` s
 1249 
 1250         STRICT1(loop)        
 1251         loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
 1252                                                 w (fromIntegral (l - n))
 1253                  in if q == nullPtr
 1254                         then []
 1255                         else let i = q `minusPtr` ptr
 1256                              in i : loop (i+1)
 1257     return $! loop 0
 1258 {-# INLINE elemIndices #-}
 1259 
 1260 {-
 1261 -- much slower
 1262 elemIndices :: Word8 -> ByteString -> [Int]
 1263 elemIndices c ps = loop 0 ps
 1264    where STRICT2(loop)
 1265          loop _ ps' | null ps'            = []
 1266          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
 1267                     | otherwise           = loop (n+1) (unsafeTail ps')
 1268 -}
 1269 
 1270 -- | count returns the number of times its argument appears in the ByteString
 1271 --
 1272 -- > count = length . elemIndices
 1273 --
 1274 -- But more efficiently than using length on the intermediate list.
 1275 count :: Word8 -> ByteString -> Int
 1276 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
 1277     fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
 1278 {-# INLINE count #-}
 1279 
 1280 {-
 1281 --
 1282 -- around 30% slower
 1283 --
 1284 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
 1285      go (p `plusPtr` s) (fromIntegral m) 0
 1286     where
 1287         go :: Ptr Word8 -> CSize -> Int -> IO Int
 1288         STRICT3(go)
 1289         go p l i = do
 1290             q <- memchr p w l
 1291             if q == nullPtr
 1292                 then return i
 1293                 else do let k = fromIntegral $ q `minusPtr` p
 1294                         go (q `plusPtr` 1) (l-k-1) (i+1)
 1295 -}
 1296 
 1297 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
 1298 -- returns the index of the first element in the ByteString
 1299 -- satisfying the predicate.
 1300 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
 1301 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
 1302   where
 1303     STRICT2(go)                  
 1304     go ptr n | n >= l    = return Nothing
 1305              | otherwise = do w <- peek ptr
 1306                               if k w
 1307                                 then return (Just n)
 1308                                 else go (ptr `plusPtr` 1) (n+1)
 1309 {-# INLINE findIndex #-}
 1310 
 1311 -- | The 'findIndices' function extends 'findIndex', by returning the
 1312 -- indices of all elements satisfying the predicate, in ascending order.
 1313 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
 1314 findIndices p ps = loop 0 ps
 1315    where
 1316      STRICT2(loop)                  
 1317      loop n qs | null qs           = []
 1318                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
 1319                | otherwise         =     loop (n+1) (unsafeTail qs)
 1320 
 1321 -- ---------------------------------------------------------------------
 1322 -- Searching ByteStrings
 1323 
 1324 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
 1325 elem :: Word8 -> ByteString -> Bool
 1326 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
 1327 {-# INLINE elem #-}
 1328 
 1329 -- | /O(n)/ 'notElem' is the inverse of 'elem'
 1330 notElem :: Word8 -> ByteString -> Bool
 1331 notElem c ps = not (elem c ps)
 1332 {-# INLINE notElem #-}
 1333 
 1334 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
 1335 -- returns a ByteString containing those characters that satisfy the
 1336 -- predicate. This function is subject to array fusion.
 1337 filter :: (Word8 -> Bool) -> ByteString -> ByteString
 1338 filter k ps@(PS x s l)
 1339     | null ps   = ps
 1340     | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
 1341         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
 1342         return $! t `minusPtr` p -- actual length
 1343     where
 1344         STRICT3(go)                            
 1345         go f t end | f == end  = return t
 1346                    | otherwise = do
 1347                         w <- peek f
 1348                         if k w
 1349                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
 1350                             else             go (f `plusPtr` 1) t               end
 1351 {-# INLINE filter #-}
 1352 
 1353 {-
 1354 --
 1355 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
 1356 -- case of filtering a single byte. It is more efficient to use
 1357 -- /filterByte/ in this case.
 1358 --
 1359 -- > filterByte == filter . (==)
 1360 --
 1361 -- filterByte is around 10x faster, and uses much less space, than its
 1362 -- filter equivalent
 1363 --
 1364 filterByte :: Word8 -> ByteString -> ByteString
 1365 filterByte w ps = replicate (count w ps) w
 1366 {-# INLINE filterByte #-}
 1367 
 1368 {-# RULES
 1369 "ByteString specialise filter (== x)" forall x.
 1370     filter ((==) x) = filterByte x
 1371 "ByteString specialise filter (== x)" forall x.
 1372     filter (== x) = filterByte x
 1373   #-}
 1374 -}
 1375 
 1376 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
 1377 -- and returns the first element in matching the predicate, or 'Nothing'
 1378 -- if there is no such element.
 1379 --
 1380 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
 1381 --
 1382 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 1383 find f p = case findIndex f p of
 1384                     Just n -> Just (p `unsafeIndex` n)
 1385                     _      -> Nothing
 1386 {-# INLINE find #-}
 1387 
 1388 {-
 1389 --
 1390 -- fuseable, but we don't want to walk the whole array.
 1391 -- 
 1392 find k = foldl findEFL Nothing
 1393     where findEFL a@(Just _) _ = a