1 {-# LANGUAGE CPP #-}
    2 {-# OPTIONS_GHC -XMagicHash -XUnboxedTuples #-}
    3 
    4 -- #prune
    5 
    6 -- |
    7 -- Module      : Data.ByteString
    8 -- Copyright   : (c) The University of Glasgow 2001,
    9 --               (c) David Roundy 2003-2005,
   10 --               (c) Simon Marlow 2005
   11 --               (c) Bjorn Bringert 2006
   12 --               (c) Don Stewart 2005-2008
   13 --
   14 --               Array fusion code:
   15 --               (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller
   16 --               (c) 2006      Manuel M T Chakravarty & Roman Leshchinskiy
   17 --
   18 -- License     : BSD-style
   19 --
   20 -- Maintainer  : dons@cse.unsw.edu.au
   21 -- Stability   : experimental
   22 -- Portability : portable
   23 -- 
   24 -- A time and space-efficient implementation of byte vectors using
   25 -- packed Word8 arrays, suitable for high performance use, both in terms
   26 -- of large data quantities, or high speed requirements. Byte vectors
   27 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
   28 -- and can be passed between C and Haskell with little effort.
   29 --
   30 -- This module is intended to be imported @qualified@, to avoid name
   31 -- clashes with "Prelude" functions.  eg.
   32 --
   33 -- > import qualified Data.ByteString as B
   34 --
   35 -- Original GHC implementation by Bryan O\'Sullivan.
   36 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
   37 -- Rewritten to support slices and use 'ForeignPtr' by David Roundy.
   38 -- Polished and extended by Don Stewart.
   39 --
   40 
   41 module Data.ByteString (
   42 
   43         -- * The @ByteString@ type
   44         ByteString,             -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
   45 
   46         -- * Introducing and eliminating 'ByteString's
   47         empty,                  -- :: ByteString
   48         singleton,              -- :: Word8   -> ByteString
   49         pack,                   -- :: [Word8] -> ByteString
   50         unpack,                 -- :: ByteString -> [Word8]
   51 
   52         -- * Basic interface
   53         cons,                   -- :: Word8 -> ByteString -> ByteString
   54         snoc,                   -- :: ByteString -> Word8 -> ByteString
   55         append,                 -- :: ByteString -> ByteString -> ByteString
   56         head,                   -- :: ByteString -> Word8
   57         uncons,                 -- :: ByteString -> Maybe (Word8, ByteString)
   58         last,                   -- :: ByteString -> Word8
   59         tail,                   -- :: ByteString -> ByteString
   60         init,                   -- :: ByteString -> ByteString
   61         null,                   -- :: ByteString -> Bool
   62         length,                 -- :: ByteString -> Int
   63 
   64         -- * Transforming ByteStrings
   65         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
   66         reverse,                -- :: ByteString -> ByteString
   67         intersperse,            -- :: Word8 -> ByteString -> ByteString
   68         intercalate,            -- :: ByteString -> [ByteString] -> ByteString
   69         transpose,              -- :: [ByteString] -> [ByteString]
   70 
   71         -- * Reducing 'ByteString's (folds)
   72         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
   73         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
   74         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   75         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   76 
   77         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
   78         foldr',                 -- :: (Word8 -> a -> a) -> a -> ByteString -> a
   79         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   80         foldr1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   81 
   82         -- ** Special folds
   83         concat,                 -- :: [ByteString] -> ByteString
   84         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
   85         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
   86         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
   87         maximum,                -- :: ByteString -> Word8
   88         minimum,                -- :: ByteString -> Word8
   89 
   90         -- * Building ByteStrings
   91         -- ** Scans
   92         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
   93         scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
   94         scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
   95         scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
   96 
   97         -- ** Accumulating maps
   98         mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
   99         mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  100 
  101         -- ** Generating and unfolding ByteStrings
  102         replicate,              -- :: Int -> Word8 -> ByteString
  103         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
  104         unfoldrN,               -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
  105 
  106         -- * Substrings
  107 
  108         -- ** Breaking strings
  109         take,                   -- :: Int -> ByteString -> ByteString
  110         drop,                   -- :: Int -> ByteString -> ByteString
  111         splitAt,                -- :: Int -> ByteString -> (ByteString, ByteString)
  112         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  113         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  114         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  115         spanEnd,                -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  116         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  117         breakEnd,               -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  118         group,                  -- :: ByteString -> [ByteString]
  119         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
  120         inits,                  -- :: ByteString -> [ByteString]
  121         tails,                  -- :: ByteString -> [ByteString]
  122 
  123         -- ** Breaking into many substrings
  124         split,                  -- :: Word8 -> ByteString -> [ByteString]
  125         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
  126 
  127         -- * Predicates
  128         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
  129         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
  130         isInfixOf,              -- :: ByteString -> ByteString -> Bool
  131 
  132         -- ** Search for arbitrary substrings
  133         breakSubstring,         -- :: ByteString -> ByteString -> (ByteString,ByteString)
  134         findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
  135         findSubstrings,         -- :: ByteString -> ByteString -> [Int]
  136 
  137         -- * Searching ByteStrings
  138 
  139         -- ** Searching by equality
  140         elem,                   -- :: Word8 -> ByteString -> Bool
  141         notElem,                -- :: Word8 -> ByteString -> Bool
  142 
  143         -- ** Searching with a predicate
  144         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
  145         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
  146         partition,              -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  147 
  148         -- * Indexing ByteStrings
  149         index,                  -- :: ByteString -> Int -> Word8
  150         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int
  151         elemIndices,            -- :: Word8 -> ByteString -> [Int]
  152         elemIndexEnd,           -- :: Word8 -> ByteString -> Maybe Int
  153         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
  154         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int]
  155         count,                  -- :: Word8 -> ByteString -> Int
  156 
  157         -- * Zipping and unzipping ByteStrings
  158         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
  159         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
  160         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
  161 
  162         -- * Ordered ByteStrings
  163         sort,                   -- :: ByteString -> ByteString
  164 
  165         -- * Low level conversions
  166         -- ** Copying ByteStrings
  167         copy,                   -- :: ByteString -> ByteString
  168 
  169         -- ** Packing 'CString's and pointers
  170         packCString,            -- :: CString -> IO ByteString
  171         packCStringLen,         -- :: CStringLen -> IO ByteString
  172 
  173         -- ** Using ByteStrings as 'CString's
  174         useAsCString,           -- :: ByteString -> (CString    -> IO a) -> IO a
  175         useAsCStringLen,        -- :: ByteString -> (CStringLen -> IO a) -> IO a
  176 
  177         -- * I\/O with 'ByteString's
  178 
  179         -- ** Standard input and output
  180         getLine,                -- :: IO ByteString
  181         getContents,            -- :: IO ByteString
  182         putStr,                 -- :: ByteString -> IO ()
  183         putStrLn,               -- :: ByteString -> IO ()
  184         interact,               -- :: (ByteString -> ByteString) -> IO ()
  185 
  186         -- ** Files
  187         readFile,               -- :: FilePath -> IO ByteString
  188         writeFile,              -- :: FilePath -> ByteString -> IO ()
  189         appendFile,             -- :: FilePath -> ByteString -> IO ()
  190 
  191         -- ** I\/O with Handles
  192         hGetLine,               -- :: Handle -> IO ByteString
  193         hGetContents,           -- :: Handle -> IO ByteString
  194         hGet,                   -- :: Handle -> Int -> IO ByteString
  195         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
  196         hPut,                   -- :: Handle -> ByteString -> IO ()
  197         hPutStr,                -- :: Handle -> ByteString -> IO ()
  198         hPutStrLn,              -- :: Handle -> ByteString -> IO ()
  199 
  200         breakByte
  201 
  202   ) where
  203 
  204 import qualified Prelude as P
  205 import Prelude hiding           (reverse,head,tail,last,init,null
  206                                 ,length,map,lines,foldl,foldr,unlines
  207                                 ,concat,any,take,drop,splitAt,takeWhile
  208                                 ,dropWhile,span,break,elem,filter,maximum
  209                                 ,minimum,all,concatMap,foldl1,foldr1
  210                                 ,scanl,scanl1,scanr,scanr1
  211                                 ,readFile,writeFile,appendFile,replicate
  212                                 ,getContents,getLine,putStr,putStrLn,interact
  213                                 ,zip,zipWith,unzip,notElem)
  214 
  215 import Data.ByteString.Internal
  216 import Data.ByteString.Unsafe
  217 
  218 import qualified Data.List as List
  219 
  220 import Data.Word                (Word8)
  221 import Data.Maybe               (isJust, listToMaybe)
  222 
  223 -- Control.Exception.bracket not available in yhc or nhc
  224 #ifndef __NHC__
  225 import Control.Exception        (finally, bracket, assert)
  226 import qualified Control.Exception as Exception
  227 #else
  228 import IO                  (bracket, finally)
  229 #endif
  230 import Control.Monad            (when)
  231 
  232 import Foreign.C.String         (CString, CStringLen)
  233 import Foreign.C.Types          (CSize)
  234 import Foreign.ForeignPtr
  235 import Foreign.Marshal.Alloc    (allocaBytes, mallocBytes, reallocBytes, finalizerFree)
  236 import Foreign.Marshal.Array    (allocaArray)
  237 import Foreign.Ptr
  238 import Foreign.Storable         (Storable(..))
  239 
  240 -- hGetBuf and hPutBuf not available in yhc or nhc
  241 import System.IO                (stdin,stdout,hClose,hFileSize
  242                                 ,hGetBuf,hPutBuf,openBinaryFile
  243                                 ,Handle,IOMode(..))
  244 
  245 import Data.Monoid              (Monoid, mempty, mappend, mconcat)
  246 
  247 #if !defined(__GLASGOW_HASKELL__)
  248 import System.IO.Unsafe
  249 import qualified System.Environment
  250 import qualified System.IO      (hGetLine)
  251 #endif
  252 
  253 #if defined(__GLASGOW_HASKELL__)
  254 
  255 import System.IO                (hGetBufNonBlocking)
  256 import System.IO.Error          (isEOFError)
  257 
  258 import GHC.Handle
  259 import GHC.Prim                 (Word#, (+#), writeWord8OffAddr#)
  260 import GHC.Base                 (build)
  261 import GHC.Word hiding (Word8)
  262 import GHC.Ptr                  (Ptr(..))
  263 import GHC.ST                   (ST(..))
  264 import GHC.IOBase
  265 
  266 #endif
  267 
  268 -- An alternative to Control.Exception (assert) for nhc98
  269 #ifdef __NHC__
  270 #define assert  assertS "__FILE__ : __LINE__"
  271 assertS :: String -> Bool -> a -> a
  272 assertS _ True  = id
  273 assertS s False = error ("assertion failed at "++s)
  274 #endif
  275 
  276 -- -----------------------------------------------------------------------------
  277 --
  278 -- Useful macros, until we have bang patterns
  279 --
  280 
  281 #define STRICT1(f) f a | a `seq` False = undefined
  282 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
  283 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
  284 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
  285 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
  286 
  287 -- -----------------------------------------------------------------------------
  288 
  289 instance Eq  ByteString where
  290     (==)    = eq
  291 
  292 instance Ord ByteString where
  293     compare = compareBytes
  294 
  295 instance Monoid ByteString where
  296     mempty  = empty
  297     mappend = append
  298     mconcat = concat
  299 
  300 -- | /O(n)/ Equality on the 'ByteString' type.
  301 eq :: ByteString -> ByteString -> Bool
  302 eq a@(PS p s l) b@(PS p' s' l')
  303     | l /= l'            = False    -- short cut on length
  304     | p == p' && s == s' = True     -- short cut for the same string
  305     | otherwise          = compareBytes a b == EQ
  306 {-# INLINE eq #-}
  307 -- ^ still needed
  308 
  309 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices. 
  310 compareBytes :: ByteString -> ByteString -> Ordering
  311 compareBytes (PS x1 s1 l1) (PS x2 s2 l2)
  312     | l1 == 0  && l2 == 0               = EQ  -- short cut for empty strings
  313     | otherwise                         = inlinePerformIO $
  314         withForeignPtr x1 $ \p1 ->
  315         withForeignPtr x2 $ \p2 -> do
  316             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2)
  317             return $! case i `compare` 0 of
  318                         EQ  -> l1 `compare` l2
  319                         x   -> x
  320 
  321 {-
  322 
  323 -- Pure Haskell version
  324 
  325 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2)
  326 --    | len1 == 0  && len2 == 0                     = EQ  -- short cut for empty strings
  327 --    | fp1 == fp2 && off1 == off2 && len1 == len2  = EQ  -- short cut for the same string
  328     | otherwise                                   = inlinePerformIO $
  329     withForeignPtr fp1 $ \p1 ->
  330         withForeignPtr fp2 $ \p2 ->
  331             cmp (p1 `plusPtr` off1)
  332                 (p2 `plusPtr` off2) 0 len1 len2
  333 
  334 -- XXX todo.
  335 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering
  336 cmp p1 p2 n len1 len2
  337       | n == len1 = if n == len2 then return EQ else return LT
  338       | n == len2 = return GT
  339       | otherwise = do
  340           a <- peekByteOff p1 n :: IO Word8
  341           b <- peekByteOff p2 n
  342           case a `compare` b of
  343                 EQ -> cmp p1 p2 (n+1) len1 len2
  344                 LT -> return LT
  345                 GT -> return GT
  346 -}
  347 
  348 -- -----------------------------------------------------------------------------
  349 -- Introducing and eliminating 'ByteString's
  350 
  351 -- | /O(1)/ The empty 'ByteString'
  352 empty :: ByteString
  353 empty = PS nullForeignPtr 0 0
  354 
  355 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
  356 singleton :: Word8 -> ByteString
  357 singleton c = unsafeCreate 1 $ \p -> poke p c
  358 {-# INLINE [1] singleton #-}
  359 
  360 -- Inline [1] for intercalate rule
  361 
  362 --
  363 -- XXX The use of unsafePerformIO in allocating functions (unsafeCreate) is critical!
  364 --
  365 -- Otherwise:
  366 --
  367 --  singleton 255 `compare` singleton 127
  368 --
  369 -- is compiled to:
  370 --
  371 --  case mallocByteString 2 of 
  372 --      ForeignPtr f internals -> 
  373 --           case writeWord8OffAddr# f 0 255 of _ -> 
  374 --           case writeWord8OffAddr# f 0 127 of _ ->
  375 --           case eqAddr# f f of 
  376 --                  False -> case compare (GHC.Prim.plusAddr# f 0) 
  377 --                                        (GHC.Prim.plusAddr# f 0)
  378 --
  379 --
  380 
  381 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. 
  382 --
  383 -- For applications with large numbers of string literals, pack can be a
  384 -- bottleneck. In such cases, consider using packAddress (GHC only).
  385 pack :: [Word8] -> ByteString
  386 
  387 #if !defined(__GLASGOW_HASKELL__)
  388 
  389 pack str = unsafeCreate (P.length str) $ \p -> go p str
  390     where
  391         go _ []     = return ()
  392         go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff
  393 
  394 #else /* hack away */
  395 
  396 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str)
  397     where
  398         go _ _ []        = return ()
  399         go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs
  400 
  401         writeByte p i c = ST $ \s# ->
  402             case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #)
  403 
  404 #endif
  405 
  406 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
  407 unpack :: ByteString -> [Word8]
  408 
  409 #if !defined(__GLASGOW_HASKELL__)
  410 
  411 unpack (PS _  _ 0) = []
  412 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p ->
  413         go (p `plusPtr` s) (l - 1) []
  414     where
  415         STRICT3(go)
  416         go p 0 acc = peek p          >>= \e -> return (e : acc)
  417         go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc)
  418 {-# INLINE unpack #-}
  419 
  420 #else
  421 
  422 unpack ps = build (unpackFoldr ps)
  423 {-# INLINE unpack #-}
  424 
  425 --
  426 -- Have unpack fuse with good list consumers
  427 --
  428 -- critical this isn't strict in the acc
  429 -- as it will break in the presence of list fusion. this is a known
  430 -- issue with seq and build/foldr rewrite rules, which rely on lazy
  431 -- demanding to avoid bottoms in the list.
  432 --
  433 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
  434 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
  435     let loop q n    _   | q `seq` n `seq` False = undefined -- n.b.
  436         loop _ (-1) acc = return acc
  437         loop q n    acc = do
  438            a <- peekByteOff q n
  439            loop q (n-1) (a `f` acc)
  440     loop (p `plusPtr` off) (len-1) ch
  441 {-# INLINE [0] unpackFoldr #-}
  442 
  443 unpackList :: ByteString -> [Word8]
  444 unpackList (PS fp off len) = withPtr fp $ \p -> do
  445     let STRICT3(loop)                            
  446         loop _ (-1) acc = return acc
  447         loop q n acc = do
  448            a <- peekByteOff q n
  449            loop q (n-1) (a : acc)
  450     loop (p `plusPtr` off) (len-1) []
  451 
  452 {-# RULES
  453 "ByteString unpack-list" [1]  forall p  .
  454     unpackFoldr p (:) [] = unpackList p
  455  #-}
  456 
  457 #endif
  458 
  459 -- ---------------------------------------------------------------------
  460 -- Basic interface
  461 
  462 -- | /O(1)/ Test whether a ByteString is empty.
  463 null :: ByteString -> Bool
  464 null (PS _ _ l) = assert (l >= 0) $ l <= 0
  465 {-# INLINE null #-}
  466 
  467 -- ---------------------------------------------------------------------
  468 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
  469 length :: ByteString -> Int
  470 length (PS _ _ l) = assert (l >= 0) $ l
  471 {-# INLINE length #-}
  472 
  473 ------------------------------------------------------------------------
  474 
  475 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
  476 -- complexity, as it requires a memcpy.
  477 cons :: Word8 -> ByteString -> ByteString
  478 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
  479         poke p c
  480         memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
  481 {-# INLINE cons #-}
  482 
  483 -- | /O(n)/ Append a byte to the end of a 'ByteString'
  484 snoc :: ByteString -> Word8 -> ByteString
  485 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
  486         memcpy p (f `plusPtr` s) (fromIntegral l)
  487         poke (p `plusPtr` l) c
  488 {-# INLINE snoc #-}
  489 
  490 -- todo fuse
  491 
  492 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
  493 -- An exception will be thrown in the case of an empty ByteString.
  494 head :: ByteString -> Word8
  495 head (PS x s l)
  496     | l <= 0    = errorEmptyList "head"
  497     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
  498 {-# INLINE head #-}
  499 
  500 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
  501 -- An exception will be thrown in the case of an empty ByteString.
  502 tail :: ByteString -> ByteString
  503 tail (PS p s l)
  504     | l <= 0    = errorEmptyList "tail"
  505     | otherwise = PS p (s+1) (l-1)
  506 {-# INLINE tail #-}
  507 
  508 -- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing
  509 -- if it is empty.
  510 uncons :: ByteString -> Maybe (Word8, ByteString)
  511 uncons (PS x s l)
  512     | l <= 0    = Nothing
  513     | otherwise = Just (inlinePerformIO $ withForeignPtr x
  514                                         $ \p -> peekByteOff p s,
  515                         PS x (s+1) (l-1))
  516 {-# INLINE uncons #-}
  517 
  518 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
  519 -- An exception will be thrown in the case of an empty ByteString.
  520 last :: ByteString -> Word8
  521 last ps@(PS x s l)
  522     | null ps   = errorEmptyList "last"
  523     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
  524 {-# INLINE last #-}
  525 
  526 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
  527 -- An exception will be thrown in the case of an empty ByteString.
  528 init :: ByteString -> ByteString
  529 init ps@(PS p s l)
  530     | null ps   = errorEmptyList "init"
  531     | otherwise = PS p s (l-1)
  532 {-# INLINE init #-}
  533 
  534 -- | /O(n)/ Append two ByteStrings
  535 append :: ByteString -> ByteString -> ByteString
  536 append xs ys | null xs   = ys
  537              | null ys   = xs
  538              | otherwise = concat [xs,ys]
  539 {-# INLINE append #-}
  540 
  541 -- ---------------------------------------------------------------------
  542 -- Transformations
  543 
  544 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
  545 -- element of @xs@. This function is subject to array fusion.
  546 map :: (Word8 -> Word8) -> ByteString -> ByteString
  547 map f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
  548     create len $ map_ 0 (a `plusPtr` s)
  549   where
  550     map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
  551     STRICT3(map_)                            
  552     map_ n p1 p2
  553        | n >= len = return ()
  554        | otherwise = do
  555             x <- peekByteOff p1 n
  556             pokeByteOff p2 n (f x)
  557             map_ (n+1) p1 p2
  558 {-# INLINE map #-}
  559 
  560 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
  561 reverse :: ByteString -> ByteString
  562 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
  563         c_reverse p (f `plusPtr` s) (fromIntegral l)
  564 
  565 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
  566 -- 'ByteString' and \`intersperses\' that byte between the elements of
  567 -- the 'ByteString'.  It is analogous to the intersperse function on
  568 -- Lists.
  569 intersperse :: Word8 -> ByteString -> ByteString
  570 intersperse c ps@(PS x s l)
  571     | length ps < 2  = ps
  572     | otherwise      = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
  573         c_intersperse p (f `plusPtr` s) (fromIntegral l) c
  574 
  575 -- | The 'transpose' function transposes the rows and columns of its
  576 -- 'ByteString' argument.
  577 transpose :: [ByteString] -> [ByteString]
  578 transpose ps = P.map pack (List.transpose (P.map unpack ps))
  579 
  580 -- ---------------------------------------------------------------------
  581 -- Reducing 'ByteString's
  582 
  583 -- | 'foldl', applied to a binary operator, a starting value (typically
  584 -- the left-identity of the operator), and a ByteString, reduces the
  585 -- ByteString using the binary operator, from left to right.
  586 --
  587 -- This function is subject to array fusion.
  588 --
  589 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
  590 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  591         lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
  592     where
  593         STRICT3(lgo)                            
  594         lgo z p q | p == q    = return z
  595                   | otherwise = do c <- peek p
  596                                    lgo (f z c) (p `plusPtr` 1) q
  597 {-# INLINE foldl #-}
  598 
  599 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
  600 -- However, for ByteStrings, all left folds are strict in the accumulator.
  601 --
  602 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
  603 foldl' = foldl
  604 {-# INLINE foldl' #-}
  605 
  606 -- | 'foldr', applied to a binary operator, a starting value
  607 -- (typically the right-identity of the operator), and a ByteString,
  608 -- reduces the ByteString using the binary operator, from right to left.
  609 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
  610 foldr k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  611         go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
  612     where
  613         STRICT3(go)                            
  614         go z p q | p == q    = return z
  615                  | otherwise = do c  <- peek p
  616                                   go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
  617 {-# INLINE foldr #-}
  618 
  619 -- | 'foldr\'' is like 'foldr', but strict in the accumulator.
  620 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
  621 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  622         go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
  623     where
  624         STRICT3(go)                            
  625         go z p q | p == q    = return z
  626                  | otherwise = do c  <- peek p
  627                                   go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
  628 {-# INLINE foldr' #-}
  629 
  630 -- | 'foldl1' is a variant of 'foldl' that has no starting value
  631 -- argument, and thus must be applied to non-empty 'ByteStrings'.
  632 -- This function is subject to array fusion. 
  633 -- An exception will be thrown in the case of an empty ByteString.
  634 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  635 foldl1 f ps
  636     | null ps   = errorEmptyList "foldl1"
  637     | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
  638 {-# INLINE foldl1 #-}
  639 
  640 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
  641 -- An exception will be thrown in the case of an empty ByteString.
  642 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  643 foldl1' f ps
  644     | null ps   = errorEmptyList "foldl1'"
  645     | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
  646 {-# INLINE foldl1' #-}
  647 
  648 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
  649 -- and thus must be applied to non-empty 'ByteString's
  650 -- An exception will be thrown in the case of an empty ByteString.
  651 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  652 foldr1 f ps
  653     | null ps        = errorEmptyList "foldr1"
  654     | otherwise      = foldr f (last ps) (init ps)
  655 {-# INLINE foldr1 #-}
  656 
  657 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
  658 -- accumulator.
  659 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  660 foldr1' f ps
  661     | null ps        = errorEmptyList "foldr1"
  662     | otherwise      = foldr' f (last ps) (init ps)
  663 {-# INLINE foldr1' #-}
  664 
  665 -- ---------------------------------------------------------------------
  666 -- Special folds
  667 
  668 -- | /O(n)/ Concatenate a list of ByteStrings.
  669 concat :: [ByteString] -> ByteString
  670 concat []     = empty
  671 concat [ps]   = ps
  672 concat xs     = unsafeCreate len $ \ptr -> go xs ptr
  673   where len = P.sum . P.map length $ xs
  674         STRICT2(go)                  
  675         go []            _   = return ()
  676         go (PS p s l:ps) ptr = do
  677                 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l)
  678                 go ps (ptr `plusPtr` l)
  679 
  680 -- | Map a function over a 'ByteString' and concatenate the results
  681 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
  682 concatMap f = concat . foldr ((:) . f) []
  683 
  684 -- foldr (append . f) empty
  685 
  686 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
  687 -- any element of the 'ByteString' satisfies the predicate.
  688 any :: (Word8 -> Bool) -> ByteString -> Bool
  689 any _ (PS _ _ 0) = False
  690 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  691         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
  692     where
  693         STRICT2(go)                  
  694         go p q | p == q    = return False
  695                | otherwise = do c <- peek p
  696                                 if f c then return True
  697                                        else go (p `plusPtr` 1) q
  698 {-# INLINE any #-}
  699 
  700 -- todo fuse
  701 
  702 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
  703 -- if all elements of the 'ByteString' satisfy the predicate.
  704 all :: (Word8 -> Bool) -> ByteString -> Bool
  705 all _ (PS _ _ 0) = True
  706 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  707         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
  708     where
  709         STRICT2(go)                  
  710         go p q | p == q     = return True  -- end of list
  711                | otherwise  = do c <- peek p
  712                                  if f c
  713                                     then go (p `plusPtr` 1) q
  714                                     else return False
  715 {-# INLINE all #-}
  716 
  717 ------------------------------------------------------------------------
  718 
  719 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
  720 -- This function will fuse.
  721 -- An exception will be thrown in the case of an empty ByteString.
  722 maximum :: ByteString -> Word8
  723 maximum xs@(PS x s l)
  724     | null xs   = errorEmptyList "maximum"
  725     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
  726                       c_maximum (p `plusPtr` s) (fromIntegral l)
  727 {-# INLINE maximum #-}
  728 
  729 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
  730 -- This function will fuse.
  731 -- An exception will be thrown in the case of an empty ByteString.
  732 minimum :: ByteString -> Word8
  733 minimum xs@(PS x s l)
  734     | null xs   = errorEmptyList "minimum"
  735     | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
  736                       c_minimum (p `plusPtr` s) (fromIntegral l)
  737 {-# INLINE minimum #-}
  738 
  739 ------------------------------------------------------------------------
  740 
  741 -- | The 'mapAccumL' function behaves like a combination of 'map' and
  742 -- 'foldl'; it applies a function to each element of a ByteString,
  743 -- passing an accumulating parameter from left to right, and returning a
  744 -- final value of this accumulator together with the new list.
  745 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  746 mapAccumL f acc (PS fp o len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
  747     gp   <- mallocByteString len
  748     acc' <- withForeignPtr gp $ \p -> mapAccumL_ acc 0 (a `plusPtr` o) p
  749     return $! (acc', PS gp 0 len)
  750   where
  751     STRICT4(mapAccumL_)
  752     mapAccumL_ s n p1 p2
  753        | n >= len = return s
  754        | otherwise = do
  755             x <- peekByteOff p1 n
  756             let (s', y) = f s x
  757             pokeByteOff p2 n y
  758             mapAccumL_ s' (n+1) p1 p2
  759 {-# INLINE mapAccumL #-}
  760 
  761 -- | The 'mapAccumR' function behaves like a combination of 'map' and
  762 -- 'foldr'; it applies a function to each element of a ByteString,
  763 -- passing an accumulating parameter from right to left, and returning a
  764 -- final value of this accumulator together with the new ByteString.
  765 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  766 mapAccumR f acc (PS fp o len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
  767     gp   <- mallocByteString len
  768     acc' <- withForeignPtr gp $ \p -> mapAccumR_ acc (len-1) (a `plusPtr` o) p
  769     return $! (acc', PS gp 0 len)
  770   where
  771     STRICT4(mapAccumR_)
  772     mapAccumR_ s n p q
  773        | n <  0    = return s
  774        | otherwise = do
  775             x  <- peekByteOff p n
  776             let (s', y) = f s x
  777             pokeByteOff q n y
  778             mapAccumR_ s' (n-1) p q
  779 {-# INLINE mapAccumR #-}
  780 
  781 -- ---------------------------------------------------------------------
  782 -- Building ByteStrings
  783 
  784 -- | 'scanl' is similar to 'foldl', but returns a list of successive
  785 -- reduced values from the left. This function will fuse.
  786 --
  787 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
  788 --
  789 -- Note that
  790 --
  791 -- > last (scanl f z xs) == foldl f z xs.
  792 --
  793 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  794 
  795 scanl f v (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
  796     create (len+1) $ \q -> do
  797         poke q v
  798         scanl_ v 0 (a `plusPtr` s) (q `plusPtr` 1)
  799   where
  800     STRICT4(scanl_)
  801     scanl_ z n p q
  802         | n >= len  = return ()
  803         | otherwise = do
  804             x <- peekByteOff p n
  805             let z' = f z x
  806             pokeByteOff q n z'
  807             scanl_ z' (n+1) p q
  808 {-# INLINE scanl #-}
  809 
  810     -- n.b. haskell's List scan returns a list one bigger than the
  811     -- input, so we need to snoc here to get some extra space, however,
  812     -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
  813 
  814 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
  815 -- This function will fuse.
  816 --
  817 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
  818 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  819 scanl1 f ps
  820     | null ps   = empty
  821     | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
  822 {-# INLINE scanl1 #-}
  823 
  824 -- | scanr is the right-to-left dual of scanl.
  825 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  826 scanr f v (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
  827     create (len+1) $ \q -> do
  828         poke (q `plusPtr` len) v
  829         scanr_ v (len-1) (a `plusPtr` s) q
  830   where
  831     STRICT4(scanr_)
  832     scanr_ z n p q
  833         | n <  0    = return ()
  834         | otherwise = do
  835             x <- peekByteOff p n
  836             let z' = f x z
  837             pokeByteOff q n z'
  838             scanr_ z' (n-1) p q
  839 {-# INLINE scanr #-}
  840 
  841 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
  842 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  843 scanr1 f ps
  844     | null ps   = empty
  845     | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
  846 {-# INLINE scanr1 #-}
  847 
  848 -- ---------------------------------------------------------------------
  849 -- Unfolds and replicates
  850 
  851 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
  852 -- the value of every element. The following holds:
  853 --
  854 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c
  855 --
  856 -- This implemenation uses @memset(3)@
  857 replicate :: Int -> Word8 -> ByteString
  858 replicate w c
  859     | w <= 0    = empty
  860     | otherwise = unsafeCreate w $ \ptr ->
  861                       memset ptr c (fromIntegral w) >> return ()
  862 
  863 -- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr' 
  864 -- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a 
  865 -- ByteString from a seed value.  The function takes the element and 
  866 -- returns 'Nothing' if it is done producing the ByteString or returns 
  867 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, 
  868 -- and @b@ is the seed value for further production.
  869 --
  870 -- Examples:
  871 --
  872 -- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
  873 -- > == pack [0, 1, 2, 3, 4, 5]
  874 --
  875 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
  876 unfoldr f = concat . unfoldChunk 32 64
  877   where unfoldChunk n n' x =
  878           case unfoldrN n f x of
  879             (s, Nothing) -> s : []
  880             (s, Just x') -> s : unfoldChunk n' (n+n') x'
  881 {-# INLINE unfoldr #-}
  882 
  883 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
  884 -- value.  However, the length of the result is limited by the first
  885 -- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
  886 -- when the maximum length of the result is known.
  887 --
  888 -- The following equation relates 'unfoldrN' and 'unfoldr':
  889 --
  890 -- > unfoldrN n f s == take n (unfoldr f s)
  891 --
  892 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
  893 unfoldrN i f x0
  894     | i < 0     = (empty, Just x0)
  895     | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
  896   where STRICT3(go)                            
  897         go p x n =
  898           case f x of
  899             Nothing      -> return (0, n, Nothing)
  900             Just (w,x')
  901              | n == i    -> return (0, n, Just x)
  902              | otherwise -> do poke p w
  903                                go (p `plusPtr` 1) x' (n+1)
  904 {-# INLINE unfoldrN #-}
  905 
  906 -- ---------------------------------------------------------------------
  907 -- Substrings
  908 
  909 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
  910 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
  911 take :: Int -> ByteString -> ByteString
  912 take n ps@(PS x s l)
  913     | n <= 0    = empty
  914     | n >= l    = ps
  915     | otherwise = PS x s n
  916 {-# INLINE take #-}
  917 
  918 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
  919 -- elements, or @[]@ if @n > 'length' xs@.
  920 drop  :: Int -> ByteString -> ByteString
  921 drop n ps@(PS x s l)
  922     | n <= 0    = ps
  923     | n >= l    = empty
  924     | otherwise = PS x (s+n) (l-n)
  925 {-# INLINE drop #-}
  926 
  927 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
  928 splitAt :: Int -> ByteString -> (ByteString, ByteString)
  929 splitAt n ps@(PS x s l)
  930     | n <= 0    = (empty, ps)
  931     | n >= l    = (ps, empty)
  932     | otherwise = (PS x s n, PS x (s+n) (l-n))
  933 {-# INLINE splitAt #-}
  934 
  935 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
  936 -- returns the longest prefix (possibly empty) of @xs@ of elements that
  937 -- satisfy @p@.
  938 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  939 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
  940 {-# INLINE takeWhile #-}
  941 
  942 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
  943 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  944 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
  945 {-# INLINE dropWhile #-}
  946 
  947 -- instead of findIndexOrEnd, we could use memchr here.
  948 
  949 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
  950 --
  951 -- Under GHC, a rewrite rule will transform break (==) into a
  952 -- call to the specialised breakByte:
  953 --
  954 -- > break ((==) x) = breakByte x
  955 -- > break (==x) = breakByte x
  956 --
  957 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  958 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
  959 #if __GLASGOW_HASKELL__ 
  960 {-# INLINE [1] break #-}
  961 #endif
  962 
  963 {-# RULES
  964 "ByteString specialise break (x==)" forall x.
  965     break ((==) x) = breakByte x
  966 "ByteString specialise break (==x)" forall x.
  967     break (==x) = breakByte x
  968   #-}
  969 
  970 -- INTERNAL:
  971 
  972 -- | 'breakByte' breaks its ByteString argument at the first occurence
  973 -- of the specified byte. It is more efficient than 'break' as it is
  974 -- implemented with @memchr(3)@. I.e.
  975 -- 
  976 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
  977 --
  978 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
  979 breakByte c p = case elemIndex c p of
  980     Nothing -> (p,empty)
  981     Just n  -> (unsafeTake n p, unsafeDrop n p)
  982 {-# INLINE breakByte #-}
  983 
  984 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
  985 -- 
  986 -- breakEnd p == spanEnd (not.p)
  987 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  988 breakEnd  p ps = splitAt (findFromEndUntil p ps) ps
  989 
  990 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
  991 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
  992 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  993 span p ps = break (not . p) ps
  994 #if __GLASGOW_HASKELL__
  995 {-# INLINE [1] span #-}
  996 #endif
  997 
  998 -- | 'spanByte' breaks its ByteString argument at the first
  999 -- occurence of a byte other than its argument. It is more efficient
 1000 -- than 'span (==)'
 1001 --
 1002 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
 1003 --
 1004 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
 1005 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 1006     go (p `plusPtr` s) 0
 1007   where
 1008     STRICT2(go)                  
 1009     go p i | i >= l    = return (ps, empty)
 1010            | otherwise = do c' <- peekByteOff p i
 1011                             if c /= c'
 1012                                 then return (unsafeTake i ps, unsafeDrop i ps)
 1013                                 else go p (i+1)
 1014 {-# INLINE spanByte #-}
 1015 
 1016 {-# RULES
 1017 "ByteString specialise span (x==)" forall x.
 1018     span ((==) x) = spanByte x
 1019 "ByteString specialise span (==x)" forall x.
 1020     span (==x) = spanByte x
 1021   #-}
 1022 
 1023 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
 1024 -- We have
 1025 --
 1026 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
 1027 --
 1028 -- and
 1029 --
 1030 -- > spanEnd (not . isSpace) ps
 1031 -- >    == 
 1032 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) 
 1033 --
 1034 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 1035 spanEnd  p ps = splitAt (findFromEndUntil (not.p) ps) ps
 1036 
 1037 -- | /O(n)/ Splits a 'ByteString' into components delimited by
 1038 -- separators, where the predicate returns True for a separator element.
 1039 -- The resulting components do not contain the separators.  Two adjacent
 1040 -- separators result in an empty component in the output.  eg.
 1041 --
 1042 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
 1043 -- > splitWith (=='a') []        == []
 1044 --
 1045 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
 1046 
 1047 #if defined(__GLASGOW_HASKELL__)
 1048 splitWith _pred (PS _  _   0) = []
 1049 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
 1050   where pred# c# = pred_ (W8# c#)
 1051 
 1052         STRICT4(splitWith0)
 1053         splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
 1054             splitLoop pred' p 0 off' len' fp'
 1055 
 1056         splitLoop :: (Word# -> Bool)
 1057                   -> Ptr Word8
 1058                   -> Int -> Int -> Int
 1059                   -> ForeignPtr Word8
 1060                   -> IO [ByteString]
 1061 
 1062         splitLoop pred' p idx' off' len' fp'
 1063             | idx' >= len'  = return [PS fp' off' idx']
 1064             | otherwise = do
 1065                 w <- peekElemOff p (off'+idx')
 1066                 if pred' (case w of W8# w# -> w#)
 1067                    then return (PS fp' off' idx' :
 1068                               splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
 1069                    else splitLoop pred' p (idx'+1) off' len' fp'
 1070 {-# INLINE splitWith #-}
 1071 
 1072 #else
 1073 splitWith _ (PS _ _ 0) = []
 1074 splitWith p ps = loop p ps
 1075     where
 1076         STRICT2(loop)
 1077         loop q qs = if null rest then [chunk]
 1078                                  else chunk : loop q (unsafeTail rest)
 1079             where (chunk,rest) = break q qs
 1080 #endif
 1081 
 1082 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
 1083 -- argument, consuming the delimiter. I.e.
 1084 --
 1085 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
 1086 -- > split 'a'  "aXaXaXa"    == ["","X","X","X",""]
 1087 -- > split 'x'  "x"          == ["",""]
 1088 -- 
 1089 -- and
 1090 --
 1091 -- > intercalate [c] . split c == id
 1092 -- > split == splitWith . (==)
 1093 -- 
 1094 -- As for all splitting functions in this library, this function does
 1095 -- not copy the substrings, it just constructs new 'ByteStrings' that
 1096 -- are slices of the original.
 1097 --
 1098 split :: Word8 -> ByteString -> [ByteString]
 1099 split _ (PS _ _ 0) = []
 1100 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
 1101     let ptr = p `plusPtr` s
 1102 
 1103         STRICT1(loop)        
 1104         loop n =
 1105             let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
 1106                                            w (fromIntegral (l-n))
 1107             in if q == nullPtr
 1108                 then [PS x (s+n) (l-n)]
 1109                 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1)
 1110 
 1111     return (loop 0)
 1112 {-# INLINE split #-}
 1113 
 1114 {-
 1115 -- slower. but stays inside Haskell.
 1116 split _ (PS _  _   0) = []
 1117 split (W8# w#) (PS fp off len) = splitWith' off len fp
 1118     where
 1119         splitWith' off' len' fp' = withPtr fp $ \p ->
 1120             splitLoop p 0 off' len' fp'
 1121 
 1122         splitLoop :: Ptr Word8
 1123                   -> Int -> Int -> Int
 1124                   -> ForeignPtr Word8
 1125                   -> IO [ByteString]
 1126 
 1127         STRICT5(splitLoop)
 1128         splitLoop p idx' off' len' fp'
 1129             | idx' >= len'  = return [PS fp' off' idx']
 1130             | otherwise = do
 1131                 (W8# x#) <- peekElemOff p (off'+idx')
 1132                 if word2Int# w# ==# word2Int# x#
 1133                    then return (PS fp' off' idx' :
 1134                               splitWith' (off'+idx'+1) (len'-idx'-1) fp')
 1135                    else splitLoop p (idx'+1) off' len' fp'
 1136 -}
 1137 
 1138 {-
 1139 -- | Like 'splitWith', except that sequences of adjacent separators are
 1140 -- treated as a single separator. eg.
 1141 -- 
 1142 -- > tokens (=='a') "aabbaca" == ["bb","c"]
 1143 --
 1144 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
 1145 tokens f = P.filter (not.null) . splitWith f
 1146 {-# INLINE tokens #-}
 1147 -}
 1148 
 1149 -- | The 'group' function takes a ByteString and returns a list of
 1150 -- ByteStrings such that the concatenation of the result is equal to the
 1151 -- argument.  Moreover, each sublist in the result contains only equal
 1152 -- elements.  For example,
 1153 --
 1154 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
 1155 --
 1156 -- It is a special case of 'groupBy', which allows the programmer to
 1157 -- supply their own equality test. It is about 40% faster than 
 1158 -- /groupBy (==)/
 1159 group :: ByteString -> [ByteString]
 1160 group xs
 1161     | null xs   = []
 1162     | otherwise = ys : group zs
 1163     where
 1164         (ys, zs) = spanByte (unsafeHead xs) xs
 1165 
 1166 -- | The 'groupBy' function is the non-overloaded version of 'group'.
 1167 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
 1168 groupBy k xs
 1169     | null xs   = []
 1170     | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
 1171     where
 1172         n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
 1173 
 1174 -- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of
 1175 -- 'ByteString's and concatenates the list after interspersing the first
 1176 -- argument between each element of the list.
 1177 intercalate :: ByteString -> [ByteString] -> ByteString
 1178 intercalate s = concat . (List.intersperse s)
 1179 {-# INLINE [1] intercalate #-}
 1180 
 1181 {-# RULES
 1182 "ByteString specialise intercalate c -> intercalateByte" forall c s1 s2 .
 1183     intercalate (singleton c) (s1 : s2 : []) = intercalateWithByte c s1 s2
 1184   #-}
 1185 
 1186 -- | /O(n)/ intercalateWithByte. An efficient way to join to two ByteStrings
 1187 -- with a char. Around 4 times faster than the generalised join.
 1188 --
 1189 intercalateWithByte :: Word8 -> ByteString -> ByteString -> ByteString
 1190 intercalateWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
 1191     withForeignPtr ffp $ \fp ->
 1192     withForeignPtr fgp $ \gp -> do
 1193         memcpy ptr (fp `plusPtr` s) (fromIntegral l)
 1194         poke (ptr `plusPtr` l) c
 1195         memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
 1196     where
 1197       len = length f + length g + 1
 1198 {-# INLINE intercalateWithByte #-}
 1199 
 1200 -- ---------------------------------------------------------------------
 1201 -- Indexing ByteStrings
 1202 
 1203 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
 1204 index :: ByteString -> Int -> Word8
 1205 index ps n
 1206     | n < 0          = moduleError "index" ("negative index: " ++ show n)
 1207     | n >= length ps = moduleError "index" ("index too large: " ++ show n
 1208                                          ++ ", length = " ++ show (length ps))
 1209     | otherwise      = ps `unsafeIndex` n
 1210 {-# INLINE index #-}
 1211 
 1212 -- | /O(n)/ The 'elemIndex' function returns the index of the first
 1213 -- element in the given 'ByteString' which is equal to the query
 1214 -- element, or 'Nothing' if there is no such element. 
 1215 -- This implementation uses memchr(3).
 1216 elemIndex :: Word8 -> ByteString -> Maybe Int
 1217 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
 1218     let p' = p `plusPtr` s
 1219     q <- memchr p' c (fromIntegral l)
 1220     return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p'
 1221 {-# INLINE elemIndex #-}
 1222 
 1223 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
 1224 -- element in the given 'ByteString' which is equal to the query
 1225 -- element, or 'Nothing' if there is no such element. The following
 1226 -- holds:
 1227 --
 1228 -- > elemIndexEnd c xs == 
 1229 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
 1230 --
 1231 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
 1232 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
 1233     go (p `plusPtr` s) (l-1)
 1234   where
 1235     STRICT2(go)                  
 1236     go p i | i < 0     = return Nothing
 1237            | otherwise = do ch' <- peekByteOff p i
 1238                             if ch == ch'
 1239                                 then return $ Just i
 1240                                 else go p (i-1)
 1241 {-# INLINE elemIndexEnd #-}
 1242 
 1243 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
 1244 -- the indices of all elements equal to the query element, in ascending order.
 1245 -- This implementation uses memchr(3).
 1246 elemIndices :: Word8 -> ByteString -> [Int]
 1247 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do
 1248     let ptr = p `plusPtr` s
 1249 
 1250         STRICT1(loop)        
 1251         loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n)
 1252                                                 w (fromIntegral (l - n))
 1253                  in if q == nullPtr
 1254                         then []
 1255                         else let i = q `minusPtr` ptr
 1256                              in i : loop (i+1)
 1257     return $! loop 0
 1258 {-# INLINE elemIndices #-}
 1259 
 1260 {-
 1261 -- much slower
 1262 elemIndices :: Word8 -> ByteString -> [Int]
 1263 elemIndices c ps = loop 0 ps
 1264    where STRICT2(loop)
 1265          loop _ ps' | null ps'            = []
 1266          loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps')
 1267                     | otherwise           = loop (n+1) (unsafeTail ps')
 1268 -}
 1269 
 1270 -- | count returns the number of times its argument appears in the ByteString
 1271 --
 1272 -- > count = length . elemIndices
 1273 --
 1274 -- But more efficiently than using length on the intermediate list.
 1275 count :: Word8 -> ByteString -> Int
 1276 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
 1277     fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w
 1278 {-# INLINE count #-}
 1279 
 1280 {-
 1281 --
 1282 -- around 30% slower
 1283 --
 1284 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p ->
 1285      go (p `plusPtr` s) (fromIntegral m) 0
 1286     where
 1287         go :: Ptr Word8 -> CSize -> Int -> IO Int
 1288         STRICT3(go)
 1289         go p l i = do
 1290             q <- memchr p w l
 1291             if q == nullPtr
 1292                 then return i
 1293                 else do let k = fromIntegral $ q `minusPtr` p
 1294                         go (q `plusPtr` 1) (l-k-1) (i+1)
 1295 -}
 1296 
 1297 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
 1298 -- returns the index of the first element in the ByteString
 1299 -- satisfying the predicate.
 1300 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
 1301 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
 1302   where
 1303     STRICT2(go)                  
 1304     go ptr n | n >= l    = return Nothing
 1305              | otherwise = do w <- peek ptr
 1306                               if k w
 1307                                 then return (Just n)
 1308                                 else go (ptr `plusPtr` 1) (n+1)
 1309 {-# INLINE findIndex #-}
 1310 
 1311 -- | The 'findIndices' function extends 'findIndex', by returning the
 1312 -- indices of all elements satisfying the predicate, in ascending order.
 1313 findIndices :: (Word8 -> Bool) -> ByteString -> [Int]
 1314 findIndices p ps = loop 0 ps
 1315    where
 1316      STRICT2(loop)                  
 1317      loop n qs | null qs           = []
 1318                | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs)
 1319                | otherwise         =     loop (n+1) (unsafeTail qs)
 1320 
 1321 -- ---------------------------------------------------------------------
 1322 -- Searching ByteStrings
 1323 
 1324 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
 1325 elem :: Word8 -> ByteString -> Bool
 1326 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True
 1327 {-# INLINE elem #-}
 1328 
 1329 -- | /O(n)/ 'notElem' is the inverse of 'elem'
 1330 notElem :: Word8 -> ByteString -> Bool
 1331 notElem c ps = not (elem c ps)
 1332 {-# INLINE notElem #-}
 1333 
 1334 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
 1335 -- returns a ByteString containing those characters that satisfy the
 1336 -- predicate. This function is subject to array fusion.
 1337 filter :: (Word8 -> Bool) -> ByteString -> ByteString
 1338 filter k ps@(PS x s l)
 1339     | null ps   = ps
 1340     | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do
 1341         t <- go (f `plusPtr` s) p (f `plusPtr` (s + l))
 1342         return $! t `minusPtr` p -- actual length
 1343     where
 1344         STRICT3(go)                            
 1345         go f t end | f == end  = return t
 1346                    | otherwise = do
 1347                         w <- peek f
 1348                         if k w
 1349                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end
 1350                             else             go (f `plusPtr` 1) t               end
 1351 {-# INLINE filter #-}
 1352 
 1353 {-
 1354 --
 1355 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common
 1356 -- case of filtering a single byte. It is more efficient to use
 1357 -- /filterByte/ in this case.
 1358 --
 1359 -- > filterByte == filter . (==)
 1360 --
 1361 -- filterByte is around 10x faster, and uses much less space, than its
 1362 -- filter equivalent
 1363 --
 1364 filterByte :: Word8 -> ByteString -> ByteString
 1365 filterByte w ps = replicate (count w ps) w
 1366 {-# INLINE filterByte #-}
 1367 
 1368 {-# RULES
 1369 "ByteString specialise filter (== x)" forall x.
 1370     filter ((==) x) = filterByte x
 1371 "ByteString specialise filter (== x)" forall x.
 1372     filter (== x) = filterByte x
 1373   #-}
 1374 -}
 1375 
 1376 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
 1377 -- and returns the first element in matching the predicate, or 'Nothing'
 1378 -- if there is no such element.
 1379 --
 1380 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
 1381 --
 1382 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
 1383 find f p = case findIndex f p of
 1384                     Just n -> Just (p `unsafeIndex` n)
 1385                     _      -> Nothing
 1386 {-# INLINE find #-}
 1387 
 1388 {-
 1389 --
 1390 -- fuseable, but we don't want to walk the whole array.
 1391 -- 
 1392 find k = foldl findEFL Nothing
 1393     where findEFL a@(Just _) _ = a
 1394           findEFL _          c | k c       = Just c
 1395                                | otherwise = Nothing
 1396 -}
 1397 
 1398 -- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns
 1399 -- the pair of ByteStrings with elements which do and do not satisfy the
 1400 -- predicate, respectively; i.e.,
 1401 --
 1402 -- > partition p bs == (filter p xs, filter (not . p) xs)
 1403 --
 1404 partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 1405 partition p bs = (filter p bs, filter (not . p) bs)
 1406 --TODO: use a better implementation
 1407 
 1408 -- ---------------------------------------------------------------------
 1409 -- Searching for substrings
 1410 
 1411 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
 1412 -- iff the first is a prefix of the second.
 1413 isPrefixOf :: ByteString -> ByteString -> Bool
 1414 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2)
 1415     | l1 == 0   = True
 1416     | l2 < l1   = False
 1417     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
 1418         withForeignPtr x2 $ \p2 -> do
 1419             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1)
 1420             return $! i == 0
 1421 
 1422 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
 1423 -- iff the first is a suffix of the second.
 1424 -- 
 1425 -- The following holds:
 1426 --
 1427 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
 1428 --
 1429 -- However, the real implemenation uses memcmp to compare the end of the
 1430 -- string only, with no reverse required..
 1431 isSuffixOf :: ByteString -> ByteString -> Bool
 1432 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2)
 1433     | l1 == 0   = True
 1434     | l2 < l1   = False
 1435     | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 ->
 1436         withForeignPtr x2 $ \p2 -> do
 1437             i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1)
 1438             return $! i == 0
 1439 
 1440 -- | Check whether one string is a substring of another. @isInfixOf
 1441 -- p s@ is equivalent to @not (null (findSubstrings p s))@.
 1442 isInfixOf :: ByteString -> ByteString -> Bool
 1443 isInfixOf p s = isJust (findSubstring p s)
 1444 
 1445 -- | Break a string on a substring, returning a pair of the part of the
 1446 -- string prior to the match, and the rest of the string.
 1447 --
 1448 -- The following relationships hold:
 1449 --
 1450 -- > break (== c) l == breakSubstring (singleton c) l
 1451 --
 1452 -- and:
 1453 --
 1454 -- > findSubstring s l ==
 1455 -- >    if null s then Just 0
 1456 -- >              else case breakSubstring s l of
 1457 -- >                       (x,y) | null y    -> Nothing
 1458 -- >                             | otherwise -> Just (length x)
 1459 --
 1460 -- For example, to tokenise a string, dropping delimiters:
 1461 --
 1462 -- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t)
 1463 -- >     where (h,t) = breakSubstring x y
 1464 --
 1465 -- To skip to the first occurence of a string:
 1466 -- 
 1467 -- > snd (breakSubstring x y) 
 1468 --
 1469 -- To take the parts of a string before a delimiter:
 1470 --
 1471 -- > fst (breakSubstring x y) 
 1472 --
 1473 breakSubstring :: ByteString -- ^ String to search for
 1474                -> ByteString -- ^ String to search in
 1475                -> (ByteString,ByteString) -- ^ Head and tail of string broken at substring
 1476 
 1477 breakSubstring pat src = search 0 src
 1478   where
 1479     STRICT2(search)                  
 1480     search n s
 1481         | null s             = (src,empty)      -- not found
 1482         | pat `isPrefixOf` s = (take n src,s)
 1483         | otherwise          = search (n+1) (unsafeTail s)
 1484 
 1485 -- | Get the first index of a substring in another string,
 1486 --   or 'Nothing' if the string is not found.
 1487 --   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
 1488 findSubstring :: ByteString -- ^ String to search for.
 1489               -> ByteString -- ^ String to seach in.
 1490               -> Maybe Int
 1491 findSubstring f i = listToMaybe (findSubstrings f i)
 1492 
 1493 {-# DEPRECATED findSubstring "findSubstring is deprecated in favour of breakSubstring." #-}
 1494 
 1495 {-
 1496 findSubstring pat str = search 0 str
 1497     where
 1498         STRICT2(search)
 1499         search n s
 1500             = let x = pat `isPrefixOf` s
 1501               in
 1502                 if null s
 1503                     then if x then Just n else Nothing
 1504                     else if x then Just n
 1505                               else     search (n+1) (unsafeTail s)
 1506 -}
 1507 
 1508 -- | Find the indexes of all (possibly overlapping) occurances of a
 1509 -- substring in a string.
 1510 --
 1511 findSubstrings :: ByteString -- ^ String to search for.
 1512                -> ByteString -- ^ String to seach in.
 1513                -> [Int]
 1514 findSubstrings pat str
 1515     | null pat         = [0 .. length str]
 1516     | otherwise        = search 0 str
 1517   where
 1518     STRICT2(search)                  
 1519     search n s
 1520         | null s             = []
 1521         | pat `isPrefixOf` s = n : search (n+1) (unsafeTail s)
 1522         | otherwise          =     search (n+1) (unsafeTail s)
 1523 
 1524 {-# DEPRECATED findSubstrings "findSubstrings is deprecated in favour of breakSubstring." #-}
 1525 
 1526 {-
 1527 {- This function uses the Knuth-Morris-Pratt string matching algorithm.  -}
 1528 
 1529 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0
 1530   where
 1531       patc x = pat `unsafeIndex` x
 1532       strc x = str `unsafeIndex` x
 1533 
 1534       -- maybe we should make kmpNext a UArray before using it in search?
 1535       kmpNext = listArray (0,m) (-1:kmpNextL pat (-1))
 1536       kmpNextL p _ | null p = []
 1537       kmpNextL p j = let j' = next (unsafeHead p) j + 1
 1538                          ps = unsafeTail p
 1539                          x = if not (null ps) && unsafeHead ps == patc j'
 1540                                 then kmpNext Array.! j' else j'
 1541                         in x:kmpNextL ps j'
 1542       search i j = match ++ rest -- i: position in string, j: position in pattern
 1543         where match = if j == m then [(i - j)] else []
 1544               rest = if i == n then [] else search (i+1) (next (strc i) j + 1)
 1545       next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j)
 1546                | otherwise = j
 1547 -}
 1548 
 1549 -- ---------------------------------------------------------------------
 1550 -- Zipping
 1551 
 1552 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
 1553 -- corresponding pairs of bytes. If one input ByteString is short,
 1554 -- excess elements of the longer ByteString are discarded. This is
 1555 -- equivalent to a pair of 'unpack' operations.
 1556 zip :: ByteString -> ByteString -> [(Word8,Word8)]
 1557 zip ps qs
 1558     | null ps || null qs = []
 1559     | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs)
 1560 
 1561 -- | 'zipWith' generalises 'zip' by zipping with the function given as
 1562 -- the first argument, instead of a tupling function.  For example,
 1563 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
 1564 -- corresponding sums. 
 1565 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
 1566 zipWith f ps qs
 1567     | null ps || null qs = []
 1568     | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
 1569 #if defined(__GLASGOW_HASKELL__)
 1570 {-# INLINE [1] zipWith #-}
 1571 #endif
 1572 
 1573 --
 1574 -- | A specialised version of zipWith for the common case of a
 1575 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
 1576 -- are used to automatically covert zipWith into zipWith' when a pack is
 1577 -- performed on the result of zipWith.
 1578 --
 1579 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
 1580 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $
 1581     withForeignPtr fp $ \a ->
 1582     withForeignPtr fq $ \b ->
 1583     create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t)
 1584   where
 1585     zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
 1586     STRICT4(zipWith_)
 1587     zipWith_ n p1 p2 r
 1588        | n >= len = return ()
 1589        | otherwise = do
 1590             x <- peekByteOff p1 n
 1591             y <- peekByteOff p2 n
 1592             pokeByteOff r n (f x y)
 1593             zipWith_ (n+1) p1 p2 r
 1594 
 1595     len = min l m
 1596 {-# INLINE zipWith' #-}
 1597 
 1598 {-# RULES
 1599 "ByteString specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
 1600     zipWith f p q = unpack (zipWith' f p q)
 1601   #-}
 1602 
 1603 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
 1604 -- ByteStrings. Note that this performs two 'pack' operations.
 1605 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
 1606 unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
 1607 {-# INLINE unzip #-}
 1608 
 1609 -- ---------------------------------------------------------------------
 1610 -- Special lists
 1611 
 1612 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
 1613 inits :: ByteString -> [ByteString]
 1614 inits (PS x s l) = [PS x s n | n <- [0..l]]
 1615 
 1616 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
 1617 tails :: ByteString -> [ByteString]
 1618 tails p | null p    = [empty]
 1619         | otherwise = p : tails (unsafeTail p)
 1620 
 1621 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]]
 1622 
 1623 -- ---------------------------------------------------------------------
 1624 -- ** Ordered 'ByteString's
 1625 
 1626 -- | /O(n)/ Sort a ByteString efficiently, using counting sort.
 1627 sort :: ByteString -> ByteString
 1628 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
 1629 
 1630     memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
 1631     withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l)
 1632 
 1633     let STRICT2(go)                  
 1634         go 256 _   = return ()
 1635         go i   ptr = do n <- peekElemOff arr i
 1636                         when (n /= 0) $ memset ptr (fromIntegral i) n >> return ()
 1637                         go (i + 1) (ptr `plusPtr` (fromIntegral n))
 1638     go 0 p
 1639   where
 1640     -- | Count the number of occurrences of each byte.
 1641     -- Used by 'sort'
 1642     --
 1643     countOccurrences :: Ptr CSize -> Ptr Word8 -> Int -> IO ()
 1644     STRICT3(countOccurrences)                            
 1645     countOccurrences counts str len = go 0
 1646      where
 1647         STRICT1(go)        
 1648         go i | i == len    = return ()
 1649              | otherwise = do k <- fromIntegral `fmap` peekElemOff str i
 1650                               x <- peekElemOff counts k
 1651                               pokeElemOff counts k (x + 1)
 1652                               go (i + 1)
 1653 
 1654 {-
 1655 sort :: ByteString -> ByteString
 1656 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do
 1657         memcpy p (f `plusPtr` s) l
 1658         c_qsort p l -- inplace
 1659 -}
 1660 
 1661 -- The 'sortBy' function is the non-overloaded version of 'sort'.
 1662 --
 1663 -- Try some linear sorts: radix, counting
 1664 -- Or mergesort.
 1665 --
 1666 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString
 1667 -- sortBy f ps = undefined
 1668 
 1669 -- ---------------------------------------------------------------------
 1670 -- Low level constructors
 1671 
 1672 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a
 1673 -- null-terminated @CString@.  The @CString@ will be freed
 1674 -- automatically. This is a memcpy(3).
 1675 useAsCString :: ByteString -> (CString -> IO a) -> IO a
 1676 useAsCString (PS fp o l) action = do
 1677  allocaBytes (l+1) $ \buf ->
 1678    withForeignPtr fp $ \p -> do
 1679      memcpy buf (p `plusPtr` o) (fromIntegral l)
 1680      pokeByteOff buf l (0::Word8)
 1681      action (castPtr buf)
 1682 
 1683 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a @CStringLen@.
 1684 -- As for @useAsCString@ this function makes a copy of the original @ByteString@.
 1685 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a
 1686 useAsCStringLen p@(PS _ _ l) f = useAsCString p $ \cstr -> f (cstr,l)
 1687 
 1688 ------------------------------------------------------------------------
 1689 
 1690 -- | /O(n)./ Construct a new @ByteString@ from a @CString@. The
 1691 -- resulting @ByteString@ is an immutable copy of the original
 1692 -- @CString@, and is managed on the Haskell heap. The original
 1693 -- @CString@ must be null terminated.
 1694 packCString :: CString -> IO ByteString
 1695 packCString cstr = do
 1696     len <- c_strlen cstr
 1697     packCStringLen (cstr, fromIntegral len)
 1698 
 1699 -- | /O(n)./ Construct a new @ByteString@ from a @CStringLen@. The
 1700 -- resulting @ByteString@ is an immutable copy of the original @CStringLen@.
 1701 -- The @ByteString@ is a normal Haskell value and will be managed on the
 1702 -- Haskell heap.
 1703 packCStringLen :: CStringLen -> IO ByteString
 1704 packCStringLen (cstr, len) = create len $ \p ->
 1705     memcpy p (castPtr cstr) (fromIntegral len)
 1706 
 1707 ------------------------------------------------------------------------
 1708 
 1709 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage. 
 1710 -- This is mainly useful to allow the rest of the data pointed
 1711 -- to by the 'ByteString' to be garbage collected, for example
 1712 -- if a large string has been read in, and only a small part of it 
 1713 -- is needed in the rest of the program.
 1714 -- 
 1715 copy :: ByteString -> ByteString
 1716 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
 1717     memcpy p (f `plusPtr` s) (fromIntegral l)
 1718 
 1719 -- ---------------------------------------------------------------------
 1720 -- Line IO
 1721 
 1722 -- | Read a line from stdin.
 1723 getLine :: IO ByteString
 1724 getLine = hGetLine stdin
 1725 
 1726 -- | Read a line from a handle
 1727 
 1728 hGetLine :: Handle -> IO ByteString
 1729 #if !defined(__GLASGOW_HASKELL__)
 1730 hGetLine h = System.IO.hGetLine h >>= return . pack . P.map c2w
 1731 #else
 1732 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do
 1733     case haBufferMode handle_ of
 1734        NoBuffering -> error "no buffering"
 1735        _other      -> hGetLineBuffered handle_
 1736 
 1737  where
 1738     hGetLineBuffered handle_ = do
 1739         let ref = haBuffer handle_
 1740         buf <- readIORef ref
 1741         hGetLineBufferedLoop handle_ ref buf 0 []
 1742 
 1743     hGetLineBufferedLoop handle_ ref
 1744             buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss =
 1745         len `seq` do
 1746         off <- findEOL r w raw
 1747         let new_len = len + off - r
 1748         xs <- mkPS raw r off
 1749 
 1750       -- if eol == True, then off is the offset of the '\n'
 1751       -- otherwise off == w and the buffer is now empty.
 1752         if off /= w
 1753             then do if (w == off + 1)
 1754                             then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
 1755                             else writeIORef ref buf{ bufRPtr = off + 1 }
 1756                     mkBigPS new_len (xs:xss)
 1757             else do
 1758                  maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_)
 1759                                     buf{ bufWPtr=0, bufRPtr=0 }
 1760                  case maybe_buf of
 1761                     -- Nothing indicates we caught an EOF, and we may have a
 1762                     -- partial line to return.
 1763                     Nothing -> do
 1764                          writeIORef ref buf{ bufRPtr=0, bufWPtr=0 }
 1765                          if new_len > 0
 1766                             then mkBigPS new_len (xs:xss)
 1767                             else ioe_EOF
 1768                     Just new_buf ->
 1769                          hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss)
 1770 
 1771     -- find the end-of-line character, if there is one
 1772     findEOL r w raw
 1773         | r == w = return w
 1774         | otherwise =  do
 1775             (c,r') <- readCharFromBuffer raw r
 1776             if c == '\n'
 1777                 then return r -- NB. not r': don't include the '\n'
 1778                 else findEOL r' w raw
 1779 
 1780     maybeFillReadBuffer fd is_line is_stream buf = catch
 1781         (do buf' <- fillReadBuffer fd is_line is_stream buf
 1782             return (Just buf'))
 1783         (\e -> if isEOFError e then return Nothing else ioError e)
 1784 
 1785 -- TODO, rewrite to use normal memcpy
 1786 mkPS :: RawBuffer -> Int -> Int -> IO ByteString
 1787 mkPS buf start end =
 1788     let len = end - start
 1789     in create len $ \p -> do
 1790         memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len)
 1791         return ()
 1792 
 1793 mkBigPS :: Int -> [ByteString] -> IO ByteString
 1794 mkBigPS _ [ps] = return ps
 1795 mkBigPS _ pss = return $! concat (P.reverse pss)
 1796 
 1797 #endif
 1798 
 1799 -- ---------------------------------------------------------------------
 1800 -- Block IO
 1801 
 1802 -- | Outputs a 'ByteString' to the specified 'Handle'.
 1803 hPut :: Handle -> ByteString -> IO ()
 1804 hPut _ (PS _  _ 0) = return ()
 1805 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l
 1806 
 1807 -- | A synonym for @hPut@, for compatibility 
 1808 hPutStr :: Handle -> ByteString -> IO ()
 1809 hPutStr = hPut
 1810 
 1811 -- | Write a ByteString to a handle, appending a newline byte
 1812 hPutStrLn :: Handle -> ByteString -> IO ()
 1813 hPutStrLn h ps
 1814     | length ps < 1024 = hPut h (ps `snoc` 0x0a)
 1815     | otherwise        = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy
 1816 
 1817 -- | Write a ByteString to stdout
 1818 putStr :: ByteString -> IO ()
 1819 putStr = hPut stdout
 1820 
 1821 -- | Write a ByteString to stdout, appending a newline byte
 1822 putStrLn :: ByteString -> IO ()
 1823 putStrLn = hPutStrLn stdout
 1824 
 1825 ------------------------------------------------------------------------
 1826 -- Low level IO
 1827 
 1828 -- | Read a 'ByteString' directly from the specified 'Handle'.  This
 1829 -- is far more efficient than reading the characters into a 'String'
 1830 -- and then using 'pack'. First argument is the Handle to read from, 
 1831 -- and the second is the number of bytes to read.
 1832 --
 1833 hGet :: Handle -> Int -> IO ByteString
 1834 hGet _ 0 = return empty
 1835 hGet h i = createAndTrim i $ \p -> hGetBuf h p i
 1836 
 1837 -- | hGetNonBlocking is identical to 'hGet', except that it will never block
 1838 -- waiting for data to become available, instead it returns only whatever data
 1839 -- is available.
 1840 --
 1841 hGetNonBlocking :: Handle -> Int -> IO ByteString
 1842 #if defined(__GLASGOW_HASKELL__)
 1843 hGetNonBlocking _ 0 = return empty
 1844 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i
 1845 #else
 1846 hGetNonBlocking = hGet
 1847 #endif
 1848 
 1849 -- | Read entire handle contents strictly into a 'ByteString'.
 1850 --
 1851 -- This function reads chunks at a time, doubling the chunksize on each
 1852 -- read. The final buffer is then realloced to the appropriate size. For
 1853 -- files > half of available memory, this may lead to memory exhaustion.
 1854 -- Consider using 'readFile' in this case.
 1855 --
 1856 -- As with 'hGet', the string representation in the file is assumed to
 1857 -- be ISO-8859-1.
 1858 --
 1859 -- The Handle is closed once the contents have been read,
 1860 -- or if an exception is thrown.
 1861 --
 1862 hGetContents :: Handle -> IO ByteString
 1863 hGetContents h = always (hClose h) $ do -- strict, so hClose
 1864     let start_size = 1024
 1865     p <- mallocBytes start_size
 1866     i <- hGetBuf h p start_size
 1867     if i < start_size
 1868         then do p' <- reallocBytes p i
 1869                 fp <- newForeignPtr finalizerFree p'
 1870                 return $! PS fp 0 i
 1871         else f p start_size
 1872     where
 1873         always = flip finally
 1874         f p s = do
 1875             let s' = 2 * s
 1876             p' <- reallocBytes p s'
 1877             i  <- hGetBuf h (p' `plusPtr` s) s
 1878             if i < s
 1879                 then do let i' = s + i
 1880                         p'' <- reallocBytes p' i'
 1881                         fp  <- newForeignPtr finalizerFree p''
 1882                         return $! PS fp 0 i'
 1883                 else f p' s'
 1884 
 1885 -- | getContents. Read stdin strictly. Equivalent to hGetContents stdin
 1886 -- The 'Handle' is closed after the contents have been read.
 1887 --
 1888 getContents :: IO ByteString
 1889 getContents = hGetContents stdin
 1890 
 1891 -- | The interact function takes a function of type @ByteString -> ByteString@
 1892 -- as its argument. The entire input from the standard input device is passed
 1893 -- to this function as its argument, and the resulting string is output on the
 1894 -- standard output device.
 1895 --
 1896 interact :: (ByteString -> ByteString) -> IO ()
 1897 interact transformer = putStr . transformer =<< getContents
 1898 
 1899 -- | Read an entire file strictly into a 'ByteString'.  This is far more
 1900 -- efficient than reading the characters into a 'String' and then using
 1901 -- 'pack'.  It also may be more efficient than opening the file and
 1902 -- reading it using hGet. Files are read using 'binary mode' on Windows,
 1903 -- for 'text mode' use the Char8 version of this function.
 1904 --
 1905 readFile :: FilePath -> IO ByteString
 1906 readFile f = bracket (openBinaryFile f ReadMode) hClose
 1907     (\h -> hFileSize h >>= hGet h . fromIntegral)
 1908 
 1909 -- | Write a 'ByteString' to a file.
 1910 writeFile :: FilePath -> ByteString -> IO ()
 1911 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
 1912     (\h -> hPut h txt)
 1913 
 1914 -- | Append a 'ByteString' to a file.
 1915 appendFile :: FilePath -> ByteString -> IO ()
 1916 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
 1917     (\h -> hPut h txt)
 1918 
 1919 -- ---------------------------------------------------------------------
 1920 -- Internal utilities
 1921 
 1922 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
 1923 -- of the string if no element is found, rather than Nothing.
 1924 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
 1925 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
 1926   where
 1927     STRICT2(go)                  
 1928     go ptr n | n >= l    = return l
 1929              | otherwise = do w <- peek ptr
 1930                               if k w
 1931                                 then return n
 1932                                 else go (ptr `plusPtr` 1) (n+1)
 1933 {-# INLINE findIndexOrEnd #-}
 1934 
 1935 -- | Perform an operation with a temporary ByteString
 1936 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b
 1937 withPtr fp io = inlinePerformIO (withForeignPtr fp io)
 1938 {-# INLINE withPtr #-}
 1939 
 1940 -- Common up near identical calls to `error' to reduce the number
 1941 -- constant strings created when compiled:
 1942 errorEmptyList :: String -> a
 1943 errorEmptyList fun = moduleError fun "empty ByteString"
 1944 {-# NOINLINE errorEmptyList #-}
 1945 
 1946 moduleError :: String -> String -> a
 1947 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg)
 1948 {-# NOINLINE moduleError #-}
 1949 
 1950 -- Find from the end of the string using predicate
 1951 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int
 1952 STRICT2(findFromEndUntil)                  
 1953 findFromEndUntil f ps@(PS x s l) =
 1954     if null ps then 0
 1955     else if f (last ps) then l
 1956          else findFromEndUntil f (PS x s (l-1))