1 {-# LANGUAGE CPP #-}
    2 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
    3 
    4 -- #prune
    5 
    6 -- |
    7 -- Module      : Data.ByteString.Lazy
    8 -- Copyright   : (c) Don Stewart 2006
    9 --               (c) Duncan Coutts 2006
   10 -- License     : BSD-style
   11 --
   12 -- Maintainer  : dons@galois.com
   13 -- Stability   : experimental
   14 -- Portability : portable
   15 -- 
   16 -- A time and space-efficient implementation of lazy byte vectors
   17 -- using lists of packed 'Word8' arrays, suitable for high performance
   18 -- use, both in terms of large data quantities, or high speed
   19 -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8'
   20 -- arrays of bytes. They provide a means to manipulate large byte vectors
   21 -- without requiring the entire vector be resident in memory.
   22 --
   23 -- Some operations, such as concat, append, reverse and cons, have
   24 -- better complexity than their "Data.ByteString" equivalents, due to
   25 -- optimisations resulting from the list spine structure. And for other
   26 -- operations lazy ByteStrings are usually within a few percent of
   27 -- strict ones, but with better heap usage. For data larger than the
   28 -- available memory, or if you have tight memory constraints, this
   29 -- module will be the only option. The default chunk size is 64k, which
   30 -- should be good in most circumstances. For people with large L2
   31 -- caches, you may want to increase this to fit your cache.
   32 --
   33 -- This module is intended to be imported @qualified@, to avoid name
   34 -- clashes with "Prelude" functions.  eg.
   35 --
   36 -- > import qualified Data.ByteString.Lazy as B
   37 --
   38 -- Original GHC implementation by Bryan O\'Sullivan.
   39 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
   40 -- Rewritten to support slices and use 'Foreign.ForeignPtr.ForeignPtr'
   41 -- by David Roundy.
   42 -- Polished and extended by Don Stewart.
   43 -- Lazy variant by Duncan Coutts and Don Stewart.
   44 --
   45 
   46 module Data.ByteString.Lazy (
   47 
   48         -- * The @ByteString@ type
   49         ByteString,             -- instances: Eq, Ord, Show, Read, Data, Typeable
   50 
   51         -- * Introducing and eliminating 'ByteString's
   52         empty,                  -- :: ByteString
   53         singleton,              -- :: Word8   -> ByteString
   54         pack,                   -- :: [Word8] -> ByteString
   55         unpack,                 -- :: ByteString -> [Word8]
   56         fromChunks,             -- :: [Strict.ByteString] -> ByteString
   57         toChunks,               -- :: ByteString -> [Strict.ByteString]
   58 
   59         -- * Basic interface
   60         cons,                   -- :: Word8 -> ByteString -> ByteString
   61         cons',                  -- :: Word8 -> ByteString -> ByteString
   62         snoc,                   -- :: ByteString -> Word8 -> ByteString
   63         append,                 -- :: ByteString -> ByteString -> ByteString
   64         head,                   -- :: ByteString -> Word8
   65         uncons,                 -- :: ByteString -> Maybe (Word8, ByteString)
   66         last,                   -- :: ByteString -> Word8
   67         tail,                   -- :: ByteString -> ByteString
   68         init,                   -- :: ByteString -> ByteString
   69         null,                   -- :: ByteString -> Bool
   70         length,                 -- :: ByteString -> Int64
   71 
   72         -- * Transforming ByteStrings
   73         map,                    -- :: (Word8 -> Word8) -> ByteString -> ByteString
   74         reverse,                -- :: ByteString -> ByteString
   75         intersperse,            -- :: Word8 -> ByteString -> ByteString
   76         intercalate,            -- :: ByteString -> [ByteString] -> ByteString
   77         transpose,              -- :: [ByteString] -> [ByteString]
   78 
   79         -- * Reducing 'ByteString's (folds)
   80         foldl,                  -- :: (a -> Word8 -> a) -> a -> ByteString -> a
   81         foldl',                 -- :: (a -> Word8 -> a) -> a -> ByteString -> a
   82         foldl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   83         foldl1',                -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   84         foldr,                  -- :: (Word8 -> a -> a) -> a -> ByteString -> a
   85         foldr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
   86 
   87         -- ** Special folds
   88         concat,                 -- :: [ByteString] -> ByteString
   89         concatMap,              -- :: (Word8 -> ByteString) -> ByteString -> ByteString
   90         any,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
   91         all,                    -- :: (Word8 -> Bool) -> ByteString -> Bool
   92         maximum,                -- :: ByteString -> Word8
   93         minimum,                -- :: ByteString -> Word8
   94 
   95         -- * Building ByteStrings
   96         -- ** Scans
   97         scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
   98 --        scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
   99 --        scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  100 --        scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  101 
  102         -- ** Accumulating maps
  103         mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  104         mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  105 
  106         -- ** Infinite ByteStrings
  107         repeat,                 -- :: Word8 -> ByteString
  108         replicate,              -- :: Int64 -> Word8 -> ByteString
  109         cycle,                  -- :: ByteString -> ByteString
  110         iterate,                -- :: (Word8 -> Word8) -> Word8 -> ByteString
  111 
  112         -- ** Unfolding ByteStrings
  113         unfoldr,                -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
  114 
  115         -- * Substrings
  116 
  117         -- ** Breaking strings
  118         take,                   -- :: Int64 -> ByteString -> ByteString
  119         drop,                   -- :: Int64 -> ByteString -> ByteString
  120         splitAt,                -- :: Int64 -> ByteString -> (ByteString, ByteString)
  121         takeWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  122         dropWhile,              -- :: (Word8 -> Bool) -> ByteString -> ByteString
  123         span,                   -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  124         break,                  -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  125         group,                  -- :: ByteString -> [ByteString]
  126         groupBy,                -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
  127         inits,                  -- :: ByteString -> [ByteString]
  128         tails,                  -- :: ByteString -> [ByteString]
  129 
  130         -- ** Breaking into many substrings
  131         split,                  -- :: Word8 -> ByteString -> [ByteString]
  132         splitWith,              -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
  133 
  134         -- * Predicates
  135         isPrefixOf,             -- :: ByteString -> ByteString -> Bool
  136         isSuffixOf,             -- :: ByteString -> ByteString -> Bool
  137 --        isInfixOf,              -- :: ByteString -> ByteString -> Bool
  138 
  139         -- ** Search for arbitrary substrings
  140 --        isSubstringOf,          -- :: ByteString -> ByteString -> Bool
  141 --        findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
  142 --        findSubstrings,         -- :: ByteString -> ByteString -> [Int]
  143 
  144         -- * Searching ByteStrings
  145 
  146         -- ** Searching by equality
  147         elem,                   -- :: Word8 -> ByteString -> Bool
  148         notElem,                -- :: Word8 -> ByteString -> Bool
  149 
  150         -- ** Searching with a predicate
  151         find,                   -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
  152         filter,                 -- :: (Word8 -> Bool) -> ByteString -> ByteString
  153         partition,              -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  154 
  155         -- * Indexing ByteStrings
  156         index,                  -- :: ByteString -> Int64 -> Word8
  157         elemIndex,              -- :: Word8 -> ByteString -> Maybe Int64
  158         elemIndices,            -- :: Word8 -> ByteString -> [Int64]
  159         findIndex,              -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64
  160         findIndices,            -- :: (Word8 -> Bool) -> ByteString -> [Int64]
  161         count,                  -- :: Word8 -> ByteString -> Int64
  162 
  163         -- * Zipping and unzipping ByteStrings
  164         zip,                    -- :: ByteString -> ByteString -> [(Word8,Word8)]
  165         zipWith,                -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
  166         unzip,                  -- :: [(Word8,Word8)] -> (ByteString,ByteString)
  167 
  168         -- * Ordered ByteStrings
  169 --        sort,                   -- :: ByteString -> ByteString
  170 
  171         -- * Low level conversions
  172         -- ** Copying ByteStrings
  173         copy,                   -- :: ByteString -> ByteString
  174 --        defrag,                -- :: ByteString -> ByteString
  175 
  176         -- * I\/O with 'ByteString's
  177 
  178         -- ** Standard input and output
  179         getContents,            -- :: IO ByteString
  180         putStr,                 -- :: ByteString -> IO ()
  181         putStrLn,               -- :: ByteString -> IO ()
  182         interact,               -- :: (ByteString -> ByteString) -> IO ()
  183 
  184         -- ** Files
  185         readFile,               -- :: FilePath -> IO ByteString
  186         writeFile,              -- :: FilePath -> ByteString -> IO ()
  187         appendFile,             -- :: FilePath -> ByteString -> IO ()
  188 
  189         -- ** I\/O with Handles
  190         hGetContents,           -- :: Handle -> IO ByteString
  191         hGet,                   -- :: Handle -> Int -> IO ByteString
  192         hGetNonBlocking,        -- :: Handle -> Int -> IO ByteString
  193         hPut,                   -- :: Handle -> ByteString -> IO ()
  194         hPutStr,                -- :: Handle -> ByteString -> IO ()
  195 
  196   ) where
  197 
  198 import qualified Prelude
  199 import Prelude hiding
  200     (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines
  201     ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum
  202     ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1
  203     ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate
  204     ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
  205 
  206 import qualified Data.List              as L  -- L for list/lazy
  207 import qualified Data.ByteString        as S  -- S for strict (hmm...)
  208 import qualified Data.ByteString.Internal as S
  209 import qualified Data.ByteString.Unsafe as S
  210 import Data.ByteString.Lazy.Internal
  211 
  212 import Data.Monoid              (Monoid(..))
  213 
  214 import Data.Word                (Word8)
  215 import Data.Int                 (Int64)
  216 import System.IO                (Handle,stdin,stdout,openBinaryFile,IOMode(..)
  217                                 ,hClose,hWaitForInput,hIsEOF)
  218 import System.IO.Unsafe
  219 #ifndef __NHC__
  220 import Control.Exception        (bracket)
  221 #else
  222 import IO                     (bracket)
  223 #endif
  224 
  225 import Foreign.ForeignPtr       (withForeignPtr)
  226 import Foreign.Ptr
  227 import Foreign.Storable
  228 
  229 -- -----------------------------------------------------------------------------
  230 --
  231 -- Useful macros, until we have bang patterns
  232 --
  233 
  234 #define STRICT1(f) f a | a `seq` False = undefined
  235 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
  236 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
  237 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
  238 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
  239 
  240 -- -----------------------------------------------------------------------------
  241 
  242 instance Eq  ByteString
  243     where (==)    = eq
  244 
  245 instance Ord ByteString
  246     where compare = cmp
  247 
  248 instance Monoid ByteString where
  249     mempty  = empty
  250     mappend = append
  251     mconcat = concat
  252 
  253 eq :: ByteString -> ByteString -> Bool
  254 eq Empty Empty = True
  255 eq Empty _     = False
  256 eq _     Empty = False
  257 eq (Chunk a as) (Chunk b bs) =
  258   case compare (S.length a) (S.length b) of
  259     LT -> a == (S.take (S.length a) b) && eq as (Chunk (S.drop (S.length a) b) bs)
  260     EQ -> a == b                       && eq as bs
  261     GT -> (S.take (S.length b) a) == b && eq (Chunk (S.drop (S.length b) a) as) bs
  262 
  263 cmp :: ByteString -> ByteString -> Ordering
  264 cmp Empty Empty = EQ
  265 cmp Empty _     = LT
  266 cmp _     Empty = GT
  267 cmp (Chunk a as) (Chunk b bs) =
  268   case compare (S.length a) (S.length b) of
  269     LT -> case compare a (S.take (S.length a) b) of
  270             EQ     -> cmp as (Chunk (S.drop (S.length a) b) bs)
  271             result -> result
  272     EQ -> case compare a b of
  273             EQ     -> cmp as bs
  274             result -> result
  275     GT -> case compare (S.take (S.length b) a) b of
  276             EQ     -> cmp (Chunk (S.drop (S.length b) a) as) bs
  277             result -> result
  278 
  279 -- -----------------------------------------------------------------------------
  280 -- Introducing and eliminating 'ByteString's
  281 
  282 -- | /O(1)/ The empty 'ByteString'
  283 empty :: ByteString
  284 empty = Empty
  285 {-# INLINE empty #-}
  286 
  287 -- | /O(1)/ Convert a 'Word8' into a 'ByteString'
  288 singleton :: Word8 -> ByteString
  289 singleton w = Chunk (S.singleton w) Empty
  290 {-# INLINE singleton #-}
  291 
  292 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. 
  293 pack :: [Word8] -> ByteString
  294 pack ws = L.foldr (Chunk . S.pack) Empty (chunks defaultChunkSize ws)
  295   where
  296     chunks :: Int -> [a] -> [[a]]
  297     chunks _    [] = []
  298     chunks size xs = case L.splitAt size xs of
  299                       (xs', xs'') -> xs' : chunks size xs''
  300 
  301 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
  302 unpack :: ByteString -> [Word8]
  303 unpack cs = L.concatMap S.unpack (toChunks cs)
  304 --TODO: we can do better here by integrating the concat with the unpack
  305 
  306 -- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString'
  307 fromChunks :: [S.ByteString] -> ByteString
  308 fromChunks cs = L.foldr chunk Empty cs
  309 
  310 -- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString'
  311 toChunks :: ByteString -> [S.ByteString]
  312 toChunks cs = foldrChunks (:) [] cs
  313 
  314 ------------------------------------------------------------------------
  315 
  316 {-
  317 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some
  318 -- conversion function
  319 packWith :: (a -> Word8) -> [a] -> ByteString
  320 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str)
  321 {-# INLINE packWith #-}
  322 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-}
  323 
  324 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function.
  325 unpackWith :: (Word8 -> a) -> ByteString -> [a]
  326 unpackWith k (LPS ss) = L.concatMap (S.unpackWith k) ss
  327 {-# INLINE unpackWith #-}
  328 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-}
  329 -}
  330 
  331 -- ---------------------------------------------------------------------
  332 -- Basic interface
  333 
  334 -- | /O(1)/ Test whether a ByteString is empty.
  335 null :: ByteString -> Bool
  336 null Empty = True
  337 null _     = False
  338 {-# INLINE null #-}
  339 
  340 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64'
  341 length :: ByteString -> Int64
  342 length cs = foldlChunks (\n c -> n + fromIntegral (S.length c)) 0 cs
  343 {-# INLINE length #-}
  344 
  345 -- | /O(1)/ 'cons' is analogous to '(:)' for lists.
  346 --
  347 cons :: Word8 -> ByteString -> ByteString
  348 cons c cs = Chunk (S.singleton c) cs
  349 {-# INLINE cons #-}
  350 
  351 -- | /O(1)/ Unlike 'cons', 'cons\'' is
  352 -- strict in the ByteString that we are consing onto. More precisely, it forces
  353 -- the head and the first chunk. It does this because, for space efficiency, it
  354 -- may coalesce the new byte onto the first \'chunk\' rather than starting a
  355 -- new \'chunk\'.
  356 --
  357 -- So that means you can't use a lazy recursive contruction like this:
  358 --
  359 -- > let xs = cons\' c xs in xs
  360 --
  361 -- You can however use 'cons', as well as 'repeat' and 'cycle', to build
  362 -- infinite lazy ByteStrings.
  363 --
  364 cons' :: Word8 -> ByteString -> ByteString
  365 cons' w (Chunk c cs) | S.length c < 16 = Chunk (S.cons w c) cs
  366 cons' w cs                             = Chunk (S.singleton w) cs
  367 {-# INLINE cons' #-}
  368 
  369 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString'
  370 snoc :: ByteString -> Word8 -> ByteString
  371 snoc cs w = foldrChunks Chunk (singleton w) cs
  372 {-# INLINE snoc #-}
  373 
  374 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
  375 head :: ByteString -> Word8
  376 head Empty       = errorEmptyList "head"
  377 head (Chunk c _) = S.unsafeHead c
  378 {-# INLINE head #-}
  379 
  380 -- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing
  381 -- if it is empty.
  382 uncons :: ByteString -> Maybe (Word8, ByteString)
  383 uncons Empty = Nothing
  384 uncons (Chunk c cs)
  385     = Just (S.unsafeHead c,
  386             if S.length c == 1 then cs else Chunk (S.unsafeTail c) cs)
  387 {-# INLINE uncons #-}
  388 
  389 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be
  390 -- non-empty.
  391 tail :: ByteString -> ByteString
  392 tail Empty          = errorEmptyList "tail"
  393 tail (Chunk c cs)
  394   | S.length c == 1 = cs
  395   | otherwise       = Chunk (S.unsafeTail c) cs
  396 {-# INLINE tail #-}
  397 
  398 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite
  399 -- and non-empty.
  400 last :: ByteString -> Word8
  401 last Empty          = errorEmptyList "last"
  402 last (Chunk c0 cs0) = go c0 cs0
  403   where go c Empty        = S.last c
  404         go _ (Chunk c cs) = go c cs
  405 -- XXX Don't inline this. Something breaks with 6.8.2 (haven't investigated yet)
  406 
  407 -- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
  408 init :: ByteString -> ByteString
  409 init Empty          = errorEmptyList "init"
  410 init (Chunk c0 cs0) = go c0 cs0
  411   where go c Empty | S.length c == 1 = Empty
  412                    | otherwise       = Chunk (S.init c) Empty
  413         go c (Chunk c' cs)           = Chunk c (go c' cs)
  414 
  415 -- | /O(n\/c)/ Append two ByteStrings
  416 append :: ByteString -> ByteString -> ByteString
  417 append xs ys = foldrChunks Chunk ys xs
  418 {-# INLINE append #-}
  419 
  420 -- ---------------------------------------------------------------------
  421 -- Transformations
  422 
  423 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
  424 -- element of @xs@.
  425 map :: (Word8 -> Word8) -> ByteString -> ByteString
  426 map f s = go s
  427     where
  428         go Empty        = Empty
  429         go (Chunk x xs) = Chunk y ys
  430             where
  431                 y  = S.map f x
  432                 ys = go xs
  433 {-# INLINE map #-}
  434 
  435 -- | /O(n)/ 'reverse' @xs@ returns the elements of @xs@ in reverse order.
  436 reverse :: ByteString -> ByteString
  437 reverse cs0 = rev Empty cs0
  438   where rev a Empty        = a
  439         rev a (Chunk c cs) = rev (Chunk (S.reverse c) a) cs
  440 {-# INLINE reverse #-}
  441 
  442 -- | The 'intersperse' function takes a 'Word8' and a 'ByteString' and
  443 -- \`intersperses\' that byte between the elements of the 'ByteString'.
  444 -- It is analogous to the intersperse function on Lists.
  445 intersperse :: Word8 -> ByteString -> ByteString
  446 intersperse _ Empty        = Empty
  447 intersperse w (Chunk c cs) = Chunk (S.intersperse w c)
  448                                    (foldrChunks (Chunk . intersperse') Empty cs)
  449   where intersperse' :: S.ByteString -> S.ByteString
  450         intersperse' (S.PS fp o l) =
  451           S.unsafeCreate (2*l) $ \p' -> withForeignPtr fp $ \p -> do
  452             poke p' w
  453             S.c_intersperse (p' `plusPtr` 1) (p `plusPtr` o) (fromIntegral l) w
  454 
  455 -- | The 'transpose' function transposes the rows and columns of its
  456 -- 'ByteString' argument.
  457 transpose :: [ByteString] -> [ByteString]
  458 transpose css = L.map (\ss -> Chunk (S.pack ss) Empty)
  459                       (L.transpose (L.map unpack css))
  460 --TODO: make this fast
  461 
  462 -- ---------------------------------------------------------------------
  463 -- Reducing 'ByteString's
  464 
  465 -- | 'foldl', applied to a binary operator, a starting value (typically
  466 -- the left-identity of the operator), and a ByteString, reduces the
  467 -- ByteString using the binary operator, from left to right.
  468 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
  469 foldl f z = go z
  470   where go a Empty        = a
  471         go a (Chunk c cs) = go (S.foldl f a c) cs
  472 {-# INLINE foldl #-}
  473 
  474 -- | 'foldl\'' is like 'foldl', but strict in the accumulator.
  475 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
  476 foldl' f z = go z
  477   where go a _ | a `seq` False = undefined
  478         go a Empty        = a
  479         go a (Chunk c cs) = go (S.foldl f a c) cs
  480 {-# INLINE foldl' #-}
  481 
  482 -- | 'foldr', applied to a binary operator, a starting value
  483 -- (typically the right-identity of the operator), and a ByteString,
  484 -- reduces the ByteString using the binary operator, from right to left.
  485 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
  486 foldr k z cs = foldrChunks (flip (S.foldr k)) z cs
  487 {-# INLINE foldr #-}
  488 
  489 -- | 'foldl1' is a variant of 'foldl' that has no starting value
  490 -- argument, and thus must be applied to non-empty 'ByteStrings'.
  491 -- This function is subject to array fusion.
  492 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  493 foldl1 _ Empty        = errorEmptyList "foldl1"
  494 foldl1 f (Chunk c cs) = foldl f (S.unsafeHead c) (Chunk (S.unsafeTail c) cs)
  495 
  496 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
  497 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  498 foldl1' _ Empty        = errorEmptyList "foldl1'"
  499 foldl1' f (Chunk c cs) = foldl' f (S.unsafeHead c) (Chunk (S.unsafeTail c) cs)
  500 
  501 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
  502 -- and thus must be applied to non-empty 'ByteString's
  503 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  504 foldr1 _ Empty          = errorEmptyList "foldr1"
  505 foldr1 f (Chunk c0 cs0) = go c0 cs0
  506   where go c Empty         = S.foldr1 f c
  507         go c (Chunk c' cs) = S.foldr  f (go c' cs) c
  508 
  509 -- ---------------------------------------------------------------------
  510 -- Special folds
  511 
  512 -- | /O(n)/ Concatenate a list of ByteStrings.
  513 concat :: [ByteString] -> ByteString
  514 concat css0 = to css0
  515   where
  516     go Empty        css = to css
  517     go (Chunk c cs) css = Chunk c (go cs css)
  518     to []               = Empty
  519     to (cs:css)         = go cs css
  520 
  521 -- | Map a function over a 'ByteString' and concatenate the results
  522 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
  523 concatMap _ Empty        = Empty
  524 concatMap f (Chunk c0 cs0) = to c0 cs0
  525   where
  526     go :: ByteString -> S.ByteString -> ByteString -> ByteString
  527     go Empty        c' cs' = to c' cs'
  528     go (Chunk c cs) c' cs' = Chunk c (go cs c' cs')
  529 
  530     to :: S.ByteString -> ByteString -> ByteString
  531     to c cs | S.null c  = case cs of
  532         Empty          -> Empty
  533         (Chunk c' cs') -> to c' cs'
  534             | otherwise = go (f (S.unsafeHead c)) (S.unsafeTail c) cs
  535 
  536 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
  537 -- any element of the 'ByteString' satisfies the predicate.
  538 any :: (Word8 -> Bool) -> ByteString -> Bool
  539 any f cs = foldrChunks (\c rest -> S.any f c || rest) False cs
  540 {-# INLINE any #-}
  541 -- todo fuse
  542 
  543 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
  544 -- if all elements of the 'ByteString' satisfy the predicate.
  545 all :: (Word8 -> Bool) -> ByteString -> Bool
  546 all f cs = foldrChunks (\c rest -> S.all f c && rest) True cs
  547 {-# INLINE all #-}
  548 -- todo fuse
  549 
  550 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
  551 maximum :: ByteString -> Word8
  552 maximum Empty        = errorEmptyList "maximum"
  553 maximum (Chunk c cs) = foldlChunks (\n c' -> n `max` S.maximum c')
  554                                    (S.maximum c) cs
  555 {-# INLINE maximum #-}
  556 
  557 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
  558 minimum :: ByteString -> Word8
  559 minimum Empty        = errorEmptyList "minimum"
  560 minimum (Chunk c cs) = foldlChunks (\n c' -> n `min` S.minimum c')
  561                                      (S.minimum c) cs
  562 {-# INLINE minimum #-}
  563 
  564 -- | The 'mapAccumL' function behaves like a combination of 'map' and
  565 -- 'foldl'; it applies a function to each element of a ByteString,
  566 -- passing an accumulating parameter from left to right, and returning a
  567 -- final value of this accumulator together with the new ByteString.
  568 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  569 mapAccumL f s0 cs0 = go s0 cs0
  570   where
  571     go s Empty        = (s, Empty)
  572     go s (Chunk c cs) = (s'', Chunk c' cs')
  573         where (s',  c')  = S.mapAccumL f s c
  574               (s'', cs') = go s' cs
  575 
  576 -- | The 'mapAccumR' function behaves like a combination of 'map' and
  577 -- 'foldr'; it applies a function to each element of a ByteString,
  578 -- passing an accumulating parameter from right to left, and returning a
  579 -- final value of this accumulator together with the new ByteString.
  580 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  581 mapAccumR f s0 cs0 = go s0 cs0
  582   where
  583     go s Empty        = (s, Empty)
  584     go s (Chunk c cs) = (s'', Chunk c' cs')
  585         where (s'', c') = S.mapAccumR f s' c
  586               (s', cs') = go s cs
  587 
  588 -- ---------------------------------------------------------------------
  589 -- Building ByteStrings
  590 
  591 -- | 'scanl' is similar to 'foldl', but returns a list of successive
  592 -- reduced values from the left. This function will fuse.
  593 --
  594 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
  595 --
  596 -- Note that
  597 --
  598 -- > last (scanl f z xs) == foldl f z xs.
  599 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  600 scanl f z = snd . foldl k (z,singleton z)
  601  where
  602     k (c,acc) a = let n = f c a in (n, acc `snoc` n)
  603 {-# INLINE scanl #-}
  604 
  605 -- ---------------------------------------------------------------------
  606 -- Unfolds and replicates
  607 
  608 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications
  609 -- of @f@ to @x@:
  610 --
  611 -- > iterate f x == [x, f x, f (f x), ...]
  612 --
  613 iterate :: (Word8 -> Word8) -> Word8 -> ByteString
  614 iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x'))
  615 
  616 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every
  617 -- element.
  618 --
  619 repeat :: Word8 -> ByteString
  620 repeat w = cs where cs = Chunk (S.replicate smallChunkSize w) cs
  621 
  622 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@
  623 -- the value of every element.
  624 --
  625 replicate :: Int64 -> Word8 -> ByteString
  626 replicate n w
  627     | n <= 0             = Empty
  628     | n < fromIntegral smallChunkSize = Chunk (S.replicate (fromIntegral n) w) Empty
  629     | r == 0             = cs -- preserve invariant
  630     | otherwise          = Chunk (S.unsafeTake (fromIntegral r) c) cs
  631  where
  632     c      = S.replicate smallChunkSize w
  633     cs     = nChunks q
  634     (q, r) = quotRem n (fromIntegral smallChunkSize)
  635     nChunks 0 = Empty
  636     nChunks m = Chunk c (nChunks (m-1))
  637 
  638 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently,
  639 -- the infinite repetition of the original ByteString.
  640 --
  641 cycle :: ByteString -> ByteString
  642 cycle Empty = errorEmptyList "cycle"
  643 cycle cs    = cs' where cs' = foldrChunks Chunk cs' cs
  644 
  645 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'.
  646 -- 'unfoldr' builds a ByteString from a seed value.  The function takes
  647 -- the element and returns 'Nothing' if it is done producing the
  648 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a
  649 -- prepending to the ByteString and @b@ is used as the next element in a
  650 -- recursive call.
  651 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
  652 unfoldr f s0 = unfoldChunk 32 s0
  653   where unfoldChunk n s =
  654           case S.unfoldrN n f s of
  655             (c, Nothing)
  656               | S.null c  -> Empty
  657               | otherwise -> Chunk c Empty
  658             (c, Just s')  -> Chunk c (unfoldChunk (n*2) s')
  659 
  660 -- ---------------------------------------------------------------------
  661 -- Substrings
  662 
  663 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
  664 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
  665 take :: Int64 -> ByteString -> ByteString
  666 take i _ | i <= 0 = Empty
  667 take i cs0         = take' i cs0
  668   where take' 0 _            = Empty
  669         take' _ Empty        = Empty
  670         take' n (Chunk c cs) =
  671           if n < fromIntegral (S.length c)
  672             then Chunk (S.take (fromIntegral n) c) Empty
  673             else Chunk c (take' (n - fromIntegral (S.length c)) cs)
  674 
  675 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
  676 -- elements, or @[]@ if @n > 'length' xs@.
  677 drop  :: Int64 -> ByteString -> ByteString
  678 drop i p | i <= 0 = p
  679 drop i cs0 = drop' i cs0
  680   where drop' 0 cs           = cs
  681         drop' _ Empty        = Empty
  682         drop' n (Chunk c cs) =
  683           if n < fromIntegral (S.length c)
  684             then Chunk (S.drop (fromIntegral n) c) cs
  685             else drop' (n - fromIntegral (S.length c)) cs
  686 
  687 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
  688 splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
  689 splitAt i cs0 | i <= 0 = (Empty, cs0)
  690 splitAt i cs0 = splitAt' i cs0
  691   where splitAt' 0 cs           = (Empty, cs)
  692         splitAt' _ Empty        = (Empty, Empty)
  693         splitAt' n (Chunk c cs) =
  694           if n < fromIntegral (S.length c)
  695             then (Chunk (S.take (fromIntegral n) c) Empty 
  696                  ,Chunk (S.drop (fromIntegral n) c) cs)
  697             else let (cs', cs'') = splitAt' (n - fromIntegral (S.length c)) cs
  698                    in (Chunk c cs', cs'')
  699 
  700 
  701 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
  702 -- returns the longest prefix (possibly empty) of @xs@ of elements that
  703 -- satisfy @p@.
  704 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  705 takeWhile f cs0 = takeWhile' cs0
  706   where takeWhile' Empty        = Empty
  707         takeWhile' (Chunk c cs) =
  708           case findIndexOrEnd (not . f) c of
  709             0                  -> Empty
  710             n | n < S.length c -> Chunk (S.take n c) Empty
  711               | otherwise      -> Chunk c (takeWhile' cs)
  712 
  713 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
  714 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  715 dropWhile f cs0 = dropWhile' cs0
  716   where dropWhile' Empty        = Empty
  717         dropWhile' (Chunk c cs) =
  718           case findIndexOrEnd (not . f) c of
  719             n | n < S.length c -> Chunk (S.drop n c) cs
  720               | otherwise      -> dropWhile' cs
  721 
  722 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
  723 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  724 break f cs0 = break' cs0
  725   where break' Empty        = (Empty, Empty)
  726         break' (Chunk c cs) =
  727           case findIndexOrEnd f c of
  728             0                  -> (Empty, Chunk c cs)
  729             n | n < S.length c -> (Chunk (S.take n c) Empty
  730                                   ,Chunk (S.drop n c) cs)
  731               | otherwise      -> let (cs', cs'') = break' cs
  732                                    in (Chunk c cs', cs'')
  733 
  734 --
  735 -- TODO
  736 --
  737 -- Add rules
  738 --
  739 
  740 {-
  741 -- | 'breakByte' breaks its ByteString argument at the first occurence
  742 -- of the specified byte. It is more efficient than 'break' as it is
  743 -- implemented with @memchr(3)@. I.e.
  744 -- 
  745 -- > break (=='c') "abcd" == breakByte 'c' "abcd"
  746 --
  747 breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
  748 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b)
  749   where breakByte' []     = ([], [])
  750         breakByte' (x:xs) =
  751           case P.elemIndex c x of
  752             Just 0  -> ([], x : xs)
  753             Just n  -> (P.take n x : [], P.drop n x : xs)
  754             Nothing -> let (xs', xs'') = breakByte' xs
  755                         in (x : xs', xs'')
  756 
  757 -- | 'spanByte' breaks its ByteString argument at the first
  758 -- occurence of a byte other than its argument. It is more efficient
  759 -- than 'span (==)'
  760 --
  761 -- > span  (=='c') "abcd" == spanByte 'c' "abcd"
  762 --
  763 spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
  764 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
  765   where spanByte' []     = ([], [])
  766         spanByte' (x:xs) =
  767           case P.spanByte c x of
  768             (x', x'') | P.null x'  -> ([], x : xs)
  769                       | P.null x'' -> let (xs', xs'') = spanByte' xs
  770                                        in (x : xs', xs'')
  771                       | otherwise  -> (x' : [], x'' : xs)
  772 -}
  773 
  774 -- | 'span' @p xs@ breaks the ByteString into two segments. It is
  775 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
  776 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  777 span p = break (not . p)
  778 
  779 -- | /O(n)/ Splits a 'ByteString' into components delimited by
  780 -- separators, where the predicate returns True for a separator element.
  781 -- The resulting components do not contain the separators.  Two adjacent
  782 -- separators result in an empty component in the output.  eg.
  783 --
  784 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
  785 -- > splitWith (=='a') []        == []
  786 --
  787 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
  788 splitWith _ Empty          = []
  789 splitWith p (Chunk c0 cs0) = comb [] (S.splitWith p c0) cs0
  790 
  791   where comb :: [S.ByteString] -> [S.ByteString] -> ByteString -> [ByteString]
  792         comb acc (s:[]) Empty        = revChunks (s:acc) : []
  793         comb acc (s:[]) (Chunk c cs) = comb (s:acc) (S.splitWith p c) cs
  794         comb acc (s:ss) cs           = revChunks (s:acc) : comb [] ss cs
  795 
  796 {-# INLINE splitWith #-}
  797 
  798 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
  799 -- argument, consuming the delimiter. I.e.
  800 --
  801 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
  802 -- > split 'a'  "aXaXaXa"    == ["","X","X","X",""]
  803 -- > split 'x'  "x"          == ["",""]
  804 -- 
  805 -- and
  806 --
  807 -- > intercalate [c] . split c == id
  808 -- > split == splitWith . (==)
  809 -- 
  810 -- As for all splitting functions in this library, this function does
  811 -- not copy the substrings, it just constructs new 'ByteStrings' that
  812 -- are slices of the original.
  813 --
  814 split :: Word8 -> ByteString -> [ByteString]
  815 split _ Empty     = []
  816 split w (Chunk c0 cs0) = comb [] (S.split w c0) cs0
  817 
  818   where comb :: [S.ByteString] -> [S.ByteString] -> ByteString -> [ByteString]
  819         comb acc (s:[]) Empty        = revChunks (s:acc) : []
  820         comb acc (s:[]) (Chunk c cs) = comb (s:acc) (S.split w c) cs
  821         comb acc (s:ss) cs           = revChunks (s:acc) : comb [] ss cs
  822 {-# INLINE split #-}
  823 
  824 {-
  825 -- | Like 'splitWith', except that sequences of adjacent separators are
  826 -- treated as a single separator. eg.
  827 -- 
  828 -- > tokens (=='a') "aabbaca" == ["bb","c"]
  829 --
  830 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
  831 tokens f = L.filter (not.null) . splitWith f
  832 -}
  833 
  834 -- | The 'group' function takes a ByteString and returns a list of
  835 -- ByteStrings such that the concatenation of the result is equal to the
  836 -- argument.  Moreover, each sublist in the result contains only equal
  837 -- elements.  For example,
  838 --
  839 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
  840 --
  841 -- It is a special case of 'groupBy', which allows the programmer to
  842 -- supply their own equality test.
  843 group :: ByteString -> [ByteString]
  844 group Empty          = []
  845 group (Chunk c0 cs0) = group' [] (S.group c0) cs0
  846   where 
  847     group' :: [S.ByteString] -> [S.ByteString] -> ByteString -> [ByteString]
  848     group' acc@(s':_) ss@(s:_) cs
  849       | S.unsafeHead s'
  850      /= S.unsafeHead s             = revNonEmptyChunks    acc  : group' [] ss cs
  851     group' acc (s:[]) Empty        = revNonEmptyChunks (s:acc) : []
  852     group' acc (s:[]) (Chunk c cs) = group' (s:acc) (S.group c) cs
  853     group' acc (s:ss) cs           = revNonEmptyChunks (s:acc) : group' [] ss cs
  854 
  855 {-
  856 TODO: check if something like this might be faster
  857 
  858 group :: ByteString -> [ByteString]
  859 group xs
  860     | null xs   = []
  861     | otherwise = ys : group zs
  862     where
  863         (ys, zs) = spanByte (unsafeHead xs) xs
  864 -}
  865 
  866 -- | The 'groupBy' function is the non-overloaded version of 'group'.
  867 --
  868 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
  869 groupBy _ Empty          = []
  870 groupBy k (Chunk c0 cs0) = groupBy' [] 0 (S.groupBy k c0) cs0
  871   where
  872     groupBy' :: [S.ByteString] -> Word8 -> [S.ByteString] -> ByteString -> [ByteString]
  873     groupBy' acc@(_:_) c ss@(s:_) cs
  874       | not (c `k` S.unsafeHead s)     = revNonEmptyChunks acc : groupBy' [] 0 ss cs
  875     groupBy' acc _ (s:[]) Empty        = revNonEmptyChunks (s : acc) : []
  876     groupBy' acc w (s:[]) (Chunk c cs) = groupBy' (s:acc) w' (S.groupBy k c) cs
  877                                            where w' | L.null acc = S.unsafeHead s
  878                                                     | otherwise  = w
  879     groupBy' acc _ (s:ss) cs           = revNonEmptyChunks (s : acc) : groupBy' [] 0 ss cs
  880 
  881 {-
  882 TODO: check if something like this might be faster
  883 
  884 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
  885 groupBy k xs
  886     | null xs   = []
  887     | otherwise = take n xs : groupBy k (drop n xs)
  888     where
  889         n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs)
  890 -}
  891 
  892 -- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of
  893 -- 'ByteString's and concatenates the list after interspersing the first
  894 -- argument between each element of the list.
  895 intercalate :: ByteString -> [ByteString] -> ByteString
  896 intercalate s = concat . (L.intersperse s)
  897 
  898 -- ---------------------------------------------------------------------
  899 -- Indexing ByteStrings
  900 
  901 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0.
  902 index :: ByteString -> Int64 -> Word8
  903 index _  i | i < 0  = moduleError "index" ("negative index: " ++ show i)
  904 index cs0 i         = index' cs0 i
  905   where index' Empty     n = moduleError "index" ("index too large: " ++ show n)
  906         index' (Chunk c cs) n
  907           | n >= fromIntegral (S.length c) = 
  908               index' cs (n - fromIntegral (S.length c))
  909           | otherwise       = S.unsafeIndex c (fromIntegral n)
  910 
  911 -- | /O(n)/ The 'elemIndex' function returns the index of the first
  912 -- element in the given 'ByteString' which is equal to the query
  913 -- element, or 'Nothing' if there is no such element. 
  914 -- This implementation uses memchr(3).
  915 elemIndex :: Word8 -> ByteString -> Maybe Int64
  916 elemIndex w cs0 = elemIndex' 0 cs0
  917   where elemIndex' _ Empty        = Nothing
  918         elemIndex' n (Chunk c cs) =
  919           case S.elemIndex w c of
  920             Nothing -> elemIndex' (n + fromIntegral (S.length c)) cs
  921             Just i  -> Just (n + fromIntegral i)
  922 
  923 {-
  924 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the
  925 -- element in the given 'ByteString' which is equal to the query
  926 -- element, or 'Nothing' if there is no such element. The following
  927 -- holds:
  928 --
  929 -- > elemIndexEnd c xs == 
  930 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
  931 --
  932 elemIndexEnd :: Word8 -> ByteString -> Maybe Int
  933 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
  934     go (p `plusPtr` s) (l-1)
  935   where
  936     STRICT2(go)
  937     go p i | i < 0     = return Nothing
  938            | otherwise = do ch' <- peekByteOff p i
  939                             if ch == ch'
  940                                 then return $ Just i
  941                                 else go p (i-1)
  942 -}
  943 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
  944 -- the indices of all elements equal to the query element, in ascending order.
  945 -- This implementation uses memchr(3).
  946 elemIndices :: Word8 -> ByteString -> [Int64]
  947 elemIndices w cs0 = elemIndices' 0 cs0
  948   where elemIndices' _ Empty        = []
  949         elemIndices' n (Chunk c cs) = L.map ((+n).fromIntegral) (S.elemIndices w c)
  950                              ++ elemIndices' (n + fromIntegral (S.length c)) cs
  951 
  952 -- | count returns the number of times its argument appears in the ByteString
  953 --
  954 -- > count = length . elemIndices
  955 --
  956 -- But more efficiently than using length on the intermediate list.
  957 count :: Word8 -> ByteString -> Int64
  958 count w cs = foldlChunks (\n c -> n + fromIntegral (S.count w c)) 0 cs
  959 
  960 -- | The 'findIndex' function takes a predicate and a 'ByteString' and
  961 -- returns the index of the first element in the ByteString
  962 -- satisfying the predicate.
  963 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64
  964 findIndex k cs0 = findIndex' 0 cs0
  965   where findIndex' _ Empty        = Nothing
  966         findIndex' n (Chunk c cs) =
  967           case S.findIndex k c of
  968             Nothing -> findIndex' (n + fromIntegral (S.length c)) cs
  969             Just i  -> Just (n + fromIntegral i)
  970 {-# INLINE findIndex #-}
  971 
  972 -- | /O(n)/ The 'find' function takes a predicate and a ByteString,
  973 -- and returns the first element in matching the predicate, or 'Nothing'
  974 -- if there is no such element.
  975 --
  976 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
  977 --
  978 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8
  979 find f cs0 = find' cs0
  980   where find' Empty        = Nothing
  981         find' (Chunk c cs) = case S.find f c of
  982             Nothing -> find' cs
  983             Just w  -> Just w
  984 {-# INLINE find #-}
  985 
  986 -- | The 'findIndices' function extends 'findIndex', by returning the
  987 -- indices of all elements satisfying the predicate, in ascending order.
  988 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64]
  989 findIndices k cs0 = findIndices' 0 cs0
  990   where findIndices' _ Empty        = []
  991         findIndices' n (Chunk c cs) = L.map ((+n).fromIntegral) (S.findIndices k c)
  992                              ++ findIndices' (n + fromIntegral (S.length c)) cs
  993 
  994 -- ---------------------------------------------------------------------
  995 -- Searching ByteStrings
  996 
  997 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate.
  998 elem :: Word8 -> ByteString -> Bool
  999 elem w cs = case elemIndex w cs of Nothing -> False ; _ -> True
 1000 
 1001 -- | /O(n)/ 'notElem' is the inverse of 'elem'
 1002 notElem :: Word8 -> ByteString -> Bool
 1003 notElem w cs = not (elem w cs)
 1004 
 1005 -- | /O(n)/ 'filter', applied to a predicate and a ByteString,
 1006 -- returns a ByteString containing those characters that satisfy the
 1007 -- predicate.
 1008 filter :: (Word8 -> Bool) -> ByteString -> ByteString
 1009 filter p s = go s
 1010     where
 1011         go Empty        = Empty
 1012         go (Chunk x xs) = chunk (S.filter p x) (go xs)
 1013 {-# INLINE filter #-}
 1014 
 1015 {-
 1016 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter .
 1017 -- (==)/, for the common case of filtering a single byte. It is more
 1018 -- efficient to use /filterByte/ in this case.
 1019 --
 1020 -- > filterByte == filter . (==)
 1021 --
 1022 -- filterByte is around 10x faster, and uses much less space, than its
 1023 -- filter equivalent
 1024 filterByte :: Word8 -> ByteString -> ByteString
 1025 filterByte w ps = replicate (count w ps) w
 1026 {-# INLINE filterByte #-}
 1027 
 1028 {-# RULES
 1029 "ByteString specialise filter (== x)" forall x.
 1030   filter ((==) x) = filterByte x
 1031 
 1032 "ByteString specialise filter (== x)" forall x.
 1033  filter (== x) = filterByte x
 1034   #-}
 1035 -}
 1036 
 1037 {-
 1038 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common
 1039 -- case of filtering a single byte out of a list. It is more efficient
 1040 -- to use /filterNotByte/ in this case.
 1041 --
 1042 -- > filterNotByte == filter . (/=)
 1043 --
 1044 -- filterNotByte is around 2x faster than its filter equivalent.
 1045 filterNotByte :: Word8 -> ByteString -> ByteString
 1046 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs)
 1047 -}
 1048 
 1049 -- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns
 1050 -- the pair of ByteStrings with elements which do and do not satisfy the
 1051 -- predicate, respectively; i.e.,
 1052 --
 1053 -- > partition p bs == (filter p xs, filter (not . p) xs)
 1054 --
 1055 partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 1056 partition f p = (filter f p, filter (not . f) p)
 1057 --TODO: use a better implementation
 1058 
 1059 -- ---------------------------------------------------------------------
 1060 -- Searching for substrings
 1061 
 1062 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True'
 1063 -- iff the first is a prefix of the second.
 1064 isPrefixOf :: ByteString -> ByteString -> Bool
 1065 isPrefixOf Empty _  = True
 1066 isPrefixOf _ Empty  = False
 1067 isPrefixOf (Chunk x xs) (Chunk y ys)
 1068     | S.length x == S.length y = x == y  && isPrefixOf xs ys
 1069     | S.length x <  S.length y = x == yh && isPrefixOf xs (Chunk yt ys)
 1070     | otherwise                = xh == y && isPrefixOf (Chunk xt xs) ys
 1071   where (xh,xt) = S.splitAt (S.length y) x
 1072         (yh,yt) = S.splitAt (S.length x) y
 1073 
 1074 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True'
 1075 -- iff the first is a suffix of the second.
 1076 -- 
 1077 -- The following holds:
 1078 --
 1079 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
 1080 --
 1081 isSuffixOf :: ByteString -> ByteString -> Bool
 1082 isSuffixOf x y = reverse x `isPrefixOf` reverse y
 1083 --TODO: a better implementation
 1084 
 1085 -- ---------------------------------------------------------------------
 1086 -- Zipping
 1087 
 1088 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of
 1089 -- corresponding pairs of bytes. If one input ByteString is short,
 1090 -- excess elements of the longer ByteString are discarded. This is
 1091 -- equivalent to a pair of 'unpack' operations.
 1092 zip :: ByteString -> ByteString -> [(Word8,Word8)]
 1093 zip = zipWith (,)
 1094 
 1095 -- | 'zipWith' generalises 'zip' by zipping with the function given as
 1096 -- the first argument, instead of a tupling function.  For example,
 1097 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of
 1098 -- corresponding sums.
 1099 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a]
 1100 zipWith _ Empty     _  = []
 1101 zipWith _ _      Empty = []
 1102 zipWith f (Chunk a as) (Chunk b bs) = go a as b bs
 1103   where
 1104     go x xs y ys = f (S.unsafeHead x) (S.unsafeHead y)
 1105                  : to (S.unsafeTail x) xs (S.unsafeTail y) ys
 1106 
 1107     to x Empty         _ _             | S.null x       = []
 1108     to _ _             y Empty         | S.null y       = []
 1109     to x xs            y ys            | not (S.null x)
 1110                                       && not (S.null y) = go x  xs y  ys
 1111     to x xs            _ (Chunk y' ys) | not (S.null x) = go x  xs y' ys
 1112     to _ (Chunk x' xs) y ys            | not (S.null y) = go x' xs y  ys
 1113     to _ (Chunk x' xs) _ (Chunk y' ys)                  = go x' xs y' ys
 1114 
 1115 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
 1116 -- ByteStrings. Note that this performs two 'pack' operations.
 1117 unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
 1118 unzip ls = (pack (L.map fst ls), pack (L.map snd ls))
 1119 {-# INLINE unzip #-}
 1120 
 1121 -- ---------------------------------------------------------------------
 1122 -- Special lists
 1123 
 1124 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
 1125 inits :: ByteString -> [ByteString]
 1126 inits = (Empty :) . inits'
 1127   where inits' Empty        = []
 1128         inits' (Chunk c cs) = L.map (\c' -> Chunk c' Empty) (L.tail (S.inits c))
 1129                            ++ L.map (Chunk c) (inits' cs)
 1130 
 1131 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
 1132 tails :: ByteString -> [ByteString]
 1133 tails Empty         = Empty : []
 1134 tails cs@(Chunk c cs')
 1135   | S.length c == 1 = cs : tails cs'
 1136   | otherwise       = cs : tails (Chunk (S.unsafeTail c) cs')
 1137 
 1138 -- ---------------------------------------------------------------------
 1139 -- Low level constructors
 1140 
 1141 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage.
 1142 --   This is mainly useful to allow the rest of the data pointed
 1143 --   to by the 'ByteString' to be garbage collected, for example
 1144 --   if a large string has been read in, and only a small part of it
 1145 --   is needed in the rest of the program.
 1146 copy :: ByteString -> ByteString
 1147 copy cs = foldrChunks (Chunk . S.copy) Empty cs
 1148 --TODO, we could coalese small blocks here
 1149 --FIXME: probably not strict enough, if we're doing this to avoid retaining
 1150 -- the parent blocks then we'd better copy strictly.
 1151 
 1152 -- ---------------------------------------------------------------------
 1153 
 1154 -- TODO defrag func that concatenates block together that are below a threshold
 1155 -- defrag :: ByteString -> ByteString
 1156 
 1157 -- ---------------------------------------------------------------------
 1158 -- Lazy ByteString IO
 1159 --
 1160 -- Rule for when to close: is it expected to read the whole file?
 1161 -- If so, close when done. 
 1162 --
 1163 
 1164 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
 1165 -- are read on demand, in at most @k@-sized chunks. It does not block
 1166 -- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are
 1167 -- available then they will be returned immediately as a smaller chunk.
 1168 --
 1169 -- The handle is closed on EOF.
 1170 --
 1171 hGetContentsN :: Int -> Handle -> IO ByteString
 1172 hGetContentsN k h = lazyRead -- TODO close on exceptions
 1173   where
 1174     lazyRead = unsafeInterleaveIO loop
 1175 
 1176     loop = do
 1177         c <- S.hGetNonBlocking h k
 1178         --TODO: I think this should distinguish EOF from no data available
 1179         -- the underlying POSIX call makes this distincion, returning either
 1180         -- 0 or EAGAIN
 1181         if S.null c
 1182           then do eof <- hIsEOF h
 1183                   if eof then hClose h >> return Empty
 1184                          else hWaitForInput h (-1)
 1185                            >> loop
 1186           else do cs <- lazyRead
 1187                   return (Chunk c cs)
 1188 
 1189 -- | Read @n@ bytes into a 'ByteString', directly from the
 1190 -- specified 'Handle', in chunks of size @k@.
 1191 --
 1192 hGetN :: Int -> Handle -> Int -> IO ByteString
 1193 hGetN _ _ 0 = return empty
 1194 hGetN k h n = readChunks n
 1195   where
 1196     STRICT1(readChunks)
 1197     readChunks i = do
 1198         c <- S.hGet h (min k i)
 1199         case S.length c of
 1200             0 -> return Empty
 1201             m -> do cs <- readChunks (i - m)
 1202                     return (Chunk c cs)
 1203 
 1204 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block
 1205 -- waiting for data to become available, instead it returns only whatever data
 1206 -- is available. Chunks are read on demand, in @k@-sized chunks.
 1207 --
 1208 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString
 1209 #if defined(__GLASGOW_HASKELL__)
 1210 hGetNonBlockingN _ _ 0 = return Empty
 1211 hGetNonBlockingN k h n = readChunks n
 1212   where
 1213     STRICT1(readChunks)
 1214     readChunks i = do
 1215         c <- S.hGetNonBlocking h (min k i)
 1216         case S.length c of
 1217             0 -> return Empty
 1218             m -> do cs <- readChunks (i - m)
 1219                     return (Chunk c cs)
 1220 #else
 1221 hGetNonBlockingN = hGetN
 1222 #endif
 1223 
 1224 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks
 1225 -- are read on demand, using the default chunk size.
 1226 --
 1227 -- Once EOF is encountered, the Handle is closed.
 1228 --
 1229 hGetContents :: Handle -> IO ByteString
 1230 hGetContents = hGetContentsN defaultChunkSize
 1231 
 1232 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'.
 1233 --
 1234 hGet :: Handle -> Int -> IO ByteString
 1235 hGet = hGetN defaultChunkSize
 1236 
 1237 -- | hGetNonBlocking is similar to 'hGet', except that it will never block
 1238 -- waiting for data to become available, instead it returns only whatever data
 1239 -- is available.
 1240 #if defined(__GLASGOW_HASKELL__)
 1241 hGetNonBlocking :: Handle -> Int -> IO ByteString
 1242 hGetNonBlocking = hGetNonBlockingN defaultChunkSize
 1243 #else
 1244 hGetNonBlocking = hGet
 1245 #endif
 1246 
 1247 -- | Read an entire file /lazily/ into a 'ByteString'.
 1248 -- The Handle will be held open until EOF is encountered.
 1249 --
 1250 readFile :: FilePath -> IO ByteString
 1251 readFile f = openBinaryFile f ReadMode >>= hGetContents
 1252 
 1253 -- | Write a 'ByteString' to a file.
 1254 --
 1255 writeFile :: FilePath -> ByteString -> IO ()
 1256 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose
 1257     (\hdl -> hPut hdl txt)
 1258 
 1259 -- | Append a 'ByteString' to a file.
 1260 --
 1261 appendFile :: FilePath -> ByteString -> IO ()
 1262 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose
 1263     (\hdl -> hPut hdl txt)
 1264 
 1265 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/
 1266 --
 1267 getContents :: IO ByteString
 1268 getContents = hGetContents stdin
 1269 
 1270 -- | Outputs a 'ByteString' to the specified 'Handle'.
 1271 --
 1272 hPut :: Handle -> ByteString -> IO ()
 1273 hPut h cs = foldrChunks (\c rest -> S.hPut h c >> rest) (return ()) cs
 1274 
 1275 -- | A synonym for @hPut@, for compatibility
 1276 --
 1277 hPutStr :: Handle -> ByteString -> IO ()
 1278 hPutStr = hPut
 1279 
 1280 -- | Write a ByteString to stdout
 1281 putStr :: ByteString -> IO ()
 1282 putStr = hPut stdout
 1283 
 1284 -- | Write a ByteString to stdout, appending a newline byte
 1285 --
 1286 putStrLn :: ByteString -> IO ()
 1287 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a)
 1288 
 1289 -- | The interact function takes a function of type @ByteString -> ByteString@
 1290 -- as its argument. The entire input from the standard input device is passed
 1291 -- to this function as its argument, and the resulting string is output on the
 1292 -- standard output device.
 1293 --
 1294 interact :: (ByteString -> ByteString) -> IO ()
 1295 interact transformer = putStr . transformer =<< getContents
 1296 
 1297 -- ---------------------------------------------------------------------
 1298 -- Internal utilities
 1299 
 1300 -- Common up near identical calls to `error' to reduce the number
 1301 -- constant strings created when compiled:
 1302 errorEmptyList :: String -> a
 1303 errorEmptyList fun = moduleError fun "empty ByteString"
 1304 
 1305 moduleError :: String -> String -> a
 1306 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg)
 1307 
 1308 
 1309 -- reverse a list of non-empty chunks into a lazy ByteString
 1310 revNonEmptyChunks :: [S.ByteString] -> ByteString
 1311 revNonEmptyChunks cs = L.foldl' (flip Chunk) Empty cs
 1312 
 1313 -- reverse a list of possibly-empty chunks into a lazy ByteString
 1314 revChunks :: [S.ByteString] -> ByteString
 1315 revChunks cs = L.foldl' (flip chunk) Empty cs
 1316 
 1317 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
 1318 -- of the string if no element is found, rather than Nothing.
 1319 findIndexOrEnd :: (Word8 -> Bool) -> S.ByteString -> Int
 1320 findIndexOrEnd k (S.PS x s l) = S.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0
 1321   where
 1322     STRICT2(go)                  
 1323     go ptr n | n >= l    = return l
 1324              | otherwise = do w <- peek ptr
 1325                               if k w
 1326                                 then return n
 1327                                 else go (ptr `plusPtr` 1) (n+1)
 1328 {-# INLINE findIndexOrEnd #-}