1 {-# LANGUAGE CPP #-} 2 {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} 3 4 -- #prune 5 6 -- | 7 -- Module : Data.ByteString.Lazy 8 -- Copyright : (c) Don Stewart 2006 9 -- (c) Duncan Coutts 2006 10 -- License : BSD-style 11 -- 12 -- Maintainer : dons@galois.com 13 -- Stability : experimental 14 -- Portability : portable 15 -- 16 -- A time and space-efficient implementation of lazy byte vectors 17 -- using lists of packed 'Word8' arrays, suitable for high performance 18 -- use, both in terms of large data quantities, or high speed 19 -- requirements. Byte vectors are encoded as lazy lists of strict 'Word8' 20 -- arrays of bytes. They provide a means to manipulate large byte vectors 21 -- without requiring the entire vector be resident in memory. 22 -- 23 -- Some operations, such as concat, append, reverse and cons, have 24 -- better complexity than their "Data.ByteString" equivalents, due to 25 -- optimisations resulting from the list spine structure. And for other 26 -- operations lazy ByteStrings are usually within a few percent of 27 -- strict ones, but with better heap usage. For data larger than the 28 -- available memory, or if you have tight memory constraints, this 29 -- module will be the only option. The default chunk size is 64k, which 30 -- should be good in most circumstances. For people with large L2 31 -- caches, you may want to increase this to fit your cache. 32 -- 33 -- This module is intended to be imported @qualified@, to avoid name 34 -- clashes with "Prelude" functions. eg. 35 -- 36 -- > import qualified Data.ByteString.Lazy as B 37 -- 38 -- Original GHC implementation by Bryan O\'Sullivan. 39 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow. 40 -- Rewritten to support slices and use 'Foreign.ForeignPtr.ForeignPtr' 41 -- by David Roundy. 42 -- Polished and extended by Don Stewart. 43 -- Lazy variant by Duncan Coutts and Don Stewart. 44 -- 45 46 module Data.ByteString.Lazy ( 47 48 -- * The @ByteString@ type 49 ByteString, -- instances: Eq, Ord, Show, Read, Data, Typeable 50 51 -- * Introducing and eliminating 'ByteString's 52 empty, -- :: ByteString 53 singleton, -- :: Word8 -> ByteString 54 pack, -- :: [Word8] -> ByteString 55 unpack, -- :: ByteString -> [Word8] 56 fromChunks, -- :: [Strict.ByteString] -> ByteString 57 toChunks, -- :: ByteString -> [Strict.ByteString] 58 59 -- * Basic interface 60 cons, -- :: Word8 -> ByteString -> ByteString 61 cons', -- :: Word8 -> ByteString -> ByteString 62 snoc, -- :: ByteString -> Word8 -> ByteString 63 append, -- :: ByteString -> ByteString -> ByteString 64 head, -- :: ByteString -> Word8 65 uncons, -- :: ByteString -> Maybe (Word8, ByteString) 66 last, -- :: ByteString -> Word8 67 tail, -- :: ByteString -> ByteString 68 init, -- :: ByteString -> ByteString 69 null, -- :: ByteString -> Bool 70 length, -- :: ByteString -> Int64 71 72 -- * Transforming ByteStrings 73 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString 74 reverse, -- :: ByteString -> ByteString 75 intersperse, -- :: Word8 -> ByteString -> ByteString 76 intercalate, -- :: ByteString -> [ByteString] -> ByteString 77 transpose, -- :: [ByteString] -> [ByteString] 78 79 -- * Reducing 'ByteString's (folds) 80 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a 81 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a 82 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 83 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 84 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a 85 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 86 87 -- ** Special folds 88 concat, -- :: [ByteString] -> ByteString 89 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString 90 any, -- :: (Word8 -> Bool) -> ByteString -> Bool 91 all, -- :: (Word8 -> Bool) -> ByteString -> Bool 92 maximum, -- :: ByteString -> Word8 93 minimum, -- :: ByteString -> Word8 94 95 -- * Building ByteStrings 96 -- ** Scans 97 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 98 -- scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 99 -- scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 100 -- scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 101 102 -- ** Accumulating maps 103 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 104 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 105 106 -- ** Infinite ByteStrings 107 repeat, -- :: Word8 -> ByteString 108 replicate, -- :: Int64 -> Word8 -> ByteString 109 cycle, -- :: ByteString -> ByteString 110 iterate, -- :: (Word8 -> Word8) -> Word8 -> ByteString 111 112 -- ** Unfolding ByteStrings 113 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString 114 115 -- * Substrings 116 117 -- ** Breaking strings 118 take, -- :: Int64 -> ByteString -> ByteString 119 drop, -- :: Int64 -> ByteString -> ByteString 120 splitAt, -- :: Int64 -> ByteString -> (ByteString, ByteString) 121 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString 122 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString 123 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 124 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 125 group, -- :: ByteString -> [ByteString] 126 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] 127 inits, -- :: ByteString -> [ByteString] 128 tails, -- :: ByteString -> [ByteString] 129 130 -- ** Breaking into many substrings 131 split, -- :: Word8 -> ByteString -> [ByteString] 132 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] 133 134 -- * Predicates 135 isPrefixOf, -- :: ByteString -> ByteString -> Bool 136 isSuffixOf, -- :: ByteString -> ByteString -> Bool 137 -- isInfixOf, -- :: ByteString -> ByteString -> Bool 138 139 -- ** Search for arbitrary substrings 140 -- isSubstringOf, -- :: ByteString -> ByteString -> Bool 141 -- findSubstring, -- :: ByteString -> ByteString -> Maybe Int 142 -- findSubstrings, -- :: ByteString -> ByteString -> [Int] 143 144 -- * Searching ByteStrings 145 146 -- ** Searching by equality 147 elem, -- :: Word8 -> ByteString -> Bool 148 notElem, -- :: Word8 -> ByteString -> Bool 149 150 -- ** Searching with a predicate 151 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8 152 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString 153 partition, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 154 155 -- * Indexing ByteStrings 156 index, -- :: ByteString -> Int64 -> Word8 157 elemIndex, -- :: Word8 -> ByteString -> Maybe Int64 158 elemIndices, -- :: Word8 -> ByteString -> [Int64] 159 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int64 160 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int64] 161 count, -- :: Word8 -> ByteString -> Int64 162 163 -- * Zipping and unzipping ByteStrings 164 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)] 165 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c] 166 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString) 167 168 -- * Ordered ByteStrings 169 -- sort, -- :: ByteString -> ByteString 170 171 -- * Low level conversions 172 -- ** Copying ByteStrings 173 copy, -- :: ByteString -> ByteString 174 -- defrag, -- :: ByteString -> ByteString 175 176 -- * I\/O with 'ByteString's 177 178 -- ** Standard input and output 179 getContents, -- :: IO ByteString 180 putStr, -- :: ByteString -> IO () 181 putStrLn, -- :: ByteString -> IO () 182 interact, -- :: (ByteString -> ByteString) -> IO () 183 184 -- ** Files 185 readFile, -- :: FilePath -> IO ByteString 186 writeFile, -- :: FilePath -> ByteString -> IO () 187 appendFile, -- :: FilePath -> ByteString -> IO () 188 189 -- ** I\/O with Handles 190 hGetContents, -- :: Handle -> IO ByteString 191 hGet, -- :: Handle -> Int -> IO ByteString 192 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString 193 hPut, -- :: Handle -> ByteString -> IO () 194 hPutStr, -- :: Handle -> ByteString -> IO () 195 196 ) where 197 198 import qualified Prelude 199 import Prelude hiding 200 (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines 201 ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter,maximum 202 ,minimum,all,concatMap,foldl1,foldr1,scanl, scanl1, scanr, scanr1 203 ,repeat, cycle, interact, iterate,readFile,writeFile,appendFile,replicate 204 ,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem) 205 206 import qualified Data.List as L -- L for list/lazy 207 import qualified Data.ByteString as S -- S for strict (hmm...) 208 import qualified Data.ByteString.Internal as S 209 import qualified Data.ByteString.Unsafe as S 210 import Data.ByteString.Lazy.Internal 211 212 import Data.Monoid (Monoid(..)) 213 214 import Data.Word (Word8) 215 import Data.Int (Int64) 216 import System.IO (Handle,stdin,stdout,openBinaryFile,IOMode(..) 217 ,hClose,hWaitForInput,hIsEOF) 218 import System.IO.Unsafe 219 #ifndef __NHC__ 220 import Control.Exception (bracket) 221 #else 222 import IO (bracket) 223 #endif 224 225 import Foreign.ForeignPtr (withForeignPtr) 226 import Foreign.Ptr 227 import Foreign.Storable 228 229 -- ----------------------------------------------------------------------------- 230 -- 231 -- Useful macros, until we have bang patterns 232 -- 233 234 #define STRICT1(f) f a | a `seq` False = undefined 235 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined 236 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined 237 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined 238 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined 239 240 -- ----------------------------------------------------------------------------- 241 242 instance Eq ByteString 243 where (==) = eq 244 245 instance Ord ByteString 246 where compare = cmp 247 248 instance Monoid ByteString where 249 mempty = empty 250 mappend = append 251 mconcat = concat 252 253 eq :: ByteString -> ByteString -> Bool 254 eq Empty Empty = True 255 eq Empty _ = False 256 eq _ Empty = False 257 eq (Chunk a as) (Chunk b bs) = 258 case compare (S.length a) (S.length b) of 259 LT -> a == (S.take (S.length a) b) && eq as (Chunk (S.drop (S.length a) b) bs) 260 EQ -> a == b && eq as bs 261 GT -> (S.take (S.length b) a) == b && eq (Chunk (S.drop (S.length b) a) as) bs 262 263 cmp :: ByteString -> ByteString -> Ordering 264 cmp Empty Empty = EQ 265 cmp Empty _ = LT 266 cmp _ Empty = GT 267 cmp (Chunk a as) (Chunk b bs) = 268 case compare (S.length a) (S.length b) of 269 LT -> case compare a (S.take (S.length a) b) of 270 EQ -> cmp as (Chunk (S.drop (S.length a) b) bs) 271 result -> result 272 EQ -> case compare a b of 273 EQ -> cmp as bs 274 result -> result 275 GT -> case compare (S.take (S.length b) a) b of 276 EQ -> cmp (Chunk (S.drop (S.length b) a) as) bs 277 result -> result 278 279 -- ----------------------------------------------------------------------------- 280 -- Introducing and eliminating 'ByteString's 281 282 -- | /O(1)/ The empty 'ByteString' 283 empty :: ByteString 284 empty = Empty 285 {-# INLINE empty #-} 286 287 -- | /O(1)/ Convert a 'Word8' into a 'ByteString' 288 singleton :: Word8 -> ByteString 289 singleton w = Chunk (S.singleton w) Empty 290 {-# INLINE singleton #-} 291 292 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. 293 pack :: [Word8] -> ByteString 294 pack ws = L.foldr (Chunk . S.pack) Empty (chunks defaultChunkSize ws) 295 where 296 chunks :: Int -> [a] -> [[a]] 297 chunks _ [] = [] 298 chunks size xs = case L.splitAt size xs of 299 (xs', xs'') -> xs' : chunks size xs'' 300 301 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'. 302 unpack :: ByteString -> [Word8] 303 unpack cs = L.concatMap S.unpack (toChunks cs) 304 --TODO: we can do better here by integrating the concat with the unpack 305 306 -- | /O(c)/ Convert a list of strict 'ByteString' into a lazy 'ByteString' 307 fromChunks :: [S.ByteString] -> ByteString 308 fromChunks cs = L.foldr chunk Empty cs 309 310 -- | /O(n)/ Convert a lazy 'ByteString' into a list of strict 'ByteString' 311 toChunks :: ByteString -> [S.ByteString] 312 toChunks cs = foldrChunks (:) [] cs 313 314 ------------------------------------------------------------------------ 315 316 {- 317 -- | /O(n)/ Convert a '[a]' into a 'ByteString' using some 318 -- conversion function 319 packWith :: (a -> Word8) -> [a] -> ByteString 320 packWith k str = LPS $ L.map (P.packWith k) (chunk defaultChunkSize str) 321 {-# INLINE packWith #-} 322 {-# SPECIALIZE packWith :: (Char -> Word8) -> [Char] -> ByteString #-} 323 324 -- | /O(n)/ Converts a 'ByteString' to a '[a]', using a conversion function. 325 unpackWith :: (Word8 -> a) -> ByteString -> [a] 326 unpackWith k (LPS ss) = L.concatMap (S.unpackWith k) ss 327 {-# INLINE unpackWith #-} 328 {-# SPECIALIZE unpackWith :: (Word8 -> Char) -> ByteString -> [Char] #-} 329 -} 330 331 -- --------------------------------------------------------------------- 332 -- Basic interface 333 334 -- | /O(1)/ Test whether a ByteString is empty. 335 null :: ByteString -> Bool 336 null Empty = True 337 null _ = False 338 {-# INLINE null #-} 339 340 -- | /O(n\/c)/ 'length' returns the length of a ByteString as an 'Int64' 341 length :: ByteString -> Int64 342 length cs = foldlChunks (\n c -> n + fromIntegral (S.length c)) 0 cs 343 {-# INLINE length #-} 344 345 -- | /O(1)/ 'cons' is analogous to '(:)' for lists. 346 -- 347 cons :: Word8 -> ByteString -> ByteString 348 cons c cs = Chunk (S.singleton c) cs 349 {-# INLINE cons #-} 350 351 -- | /O(1)/ Unlike 'cons', 'cons\'' is 352 -- strict in the ByteString that we are consing onto. More precisely, it forces 353 -- the head and the first chunk. It does this because, for space efficiency, it 354 -- may coalesce the new byte onto the first \'chunk\' rather than starting a 355 -- new \'chunk\'. 356 -- 357 -- So that means you can't use a lazy recursive contruction like this: 358 -- 359 -- > let xs = cons\' c xs in xs 360 -- 361 -- You can however use 'cons', as well as 'repeat' and 'cycle', to build 362 -- infinite lazy ByteStrings. 363 -- 364 cons' :: Word8 -> ByteString -> ByteString 365 cons' w (Chunk c cs) | S.length c < 16 = Chunk (S.cons w c) cs 366 cons' w cs = Chunk (S.singleton w) cs 367 {-# INLINE cons' #-} 368 369 -- | /O(n\/c)/ Append a byte to the end of a 'ByteString' 370 snoc :: ByteString -> Word8 -> ByteString 371 snoc cs w = foldrChunks Chunk (singleton w) cs 372 {-# INLINE snoc #-} 373 374 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. 375 head :: ByteString -> Word8 376 head Empty = errorEmptyList "head" 377 head (Chunk c _) = S.unsafeHead c 378 {-# INLINE head #-} 379 380 -- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing 381 -- if it is empty. 382 uncons :: ByteString -> Maybe (Word8, ByteString) 383 uncons Empty = Nothing 384 uncons (Chunk c cs) 385 = Just (S.unsafeHead c, 386 if S.length c == 1 then cs else Chunk (S.unsafeTail c) cs) 387 {-# INLINE uncons #-} 388 389 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be 390 -- non-empty. 391 tail :: ByteString -> ByteString 392 tail Empty = errorEmptyList "tail" 393 tail (Chunk c cs) 394 | S.length c == 1 = cs 395 | otherwise = Chunk (S.unsafeTail c) cs 396 {-# INLINE tail #-} 397 398 -- | /O(n\/c)/ Extract the last element of a ByteString, which must be finite 399 -- and non-empty. 400 last :: ByteString -> Word8 401 last Empty = errorEmptyList "last" 402 last (Chunk c0 cs0) = go c0 cs0 403 where go c Empty = S.last c 404 go _ (Chunk c cs) = go c cs 405 -- XXX Don't inline this. Something breaks with 6.8.2 (haven't investigated yet) 406 407 -- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one. 408 init :: ByteString -> ByteString 409 init Empty = errorEmptyList "init" 410 init (Chunk c0 cs0) = go c0 cs0 411 where go c Empty | S.length c == 1 = Empty 412 | otherwise = Chunk (S.init c) Empty 413 go c (Chunk c' cs) = Chunk c (go c' cs) 414 415 -- | /O(n\/c)/ Append two ByteStrings 416 append :: ByteString -> ByteString -> ByteString 417 append xs ys = foldrChunks Chunk ys xs 418 {-# INLINE append #-} 419 420 -- --------------------------------------------------------------------- 421 -- Transformations 422 423 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each 424 -- element of @xs@. 425 map :: (Word8 -> Word8) -> ByteString -> ByteString 426 map f s = go s 427 where 428 go Empty = Empty 429 go (Chunk x xs) = Chunk y ys 430 where 431 y = S.map f x 432 ys = go xs 433 {-# INLINE map #-} 434 435 -- | /O(n)/ 'reverse' @xs@ returns the elements of @xs@ in reverse order. 436 reverse :: ByteString -> ByteString 437 reverse cs0 = rev Empty cs0 438 where rev a Empty = a 439 rev a (Chunk c cs) = rev (Chunk (S.reverse c) a) cs 440 {-# INLINE reverse #-} 441 442 -- | The 'intersperse' function takes a 'Word8' and a 'ByteString' and 443 -- \`intersperses\' that byte between the elements of the 'ByteString'. 444 -- It is analogous to the intersperse function on Lists. 445 intersperse :: Word8 -> ByteString -> ByteString 446 intersperse _ Empty = Empty 447 intersperse w (Chunk c cs) = Chunk (S.intersperse w c) 448 (foldrChunks (Chunk . intersperse') Empty cs) 449 where intersperse' :: S.ByteString -> S.ByteString 450 intersperse' (S.PS fp o l) = 451 S.unsafeCreate (2*l) $ \p' -> withForeignPtr fp $ \p -> do 452 poke p' w 453 S.c_intersperse (p' `plusPtr` 1) (p `plusPtr` o) (fromIntegral l) w 454 455 -- | The 'transpose' function transposes the rows and columns of its 456 -- 'ByteString' argument. 457 transpose :: [ByteString] -> [ByteString] 458 transpose css = L.map (\ss -> Chunk (S.pack ss) Empty) 459 (L.transpose (L.map unpack css)) 460 --TODO: make this fast 461 462 -- --------------------------------------------------------------------- 463 -- Reducing 'ByteString's 464 465 -- | 'foldl', applied to a binary operator, a starting value (typically 466 -- the left-identity of the operator), and a ByteString, reduces the 467 -- ByteString using the binary operator, from left to right. 468 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a 469 foldl f z = go z 470 where go a Empty = a 471 go a (Chunk c cs) = go (S.foldl f a c) cs 472 {-# INLINE foldl #-} 473 474 -- | 'foldl\'' is like 'foldl', but strict in the accumulator. 475 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a 476 foldl' f z = go z 477 where go a _ | a `seq` False = undefined 478 go a Empty = a 479 go a (Chunk c cs) = go (S.foldl f a c) cs 480 {-# INLINE foldl' #-} 481 482 -- | 'foldr', applied to a binary operator, a starting value 483 -- (typically the right-identity of the operator), and a ByteString, 484 -- reduces the ByteString using the binary operator, from right to left. 485 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a 486 foldr k z cs = foldrChunks (flip (S.foldr k)) z cs 487 {-# INLINE foldr #-} 488 489 -- | 'foldl1' is a variant of 'foldl' that has no starting value 490 -- argument, and thus must be applied to non-empty 'ByteStrings'. 491 -- This function is subject to array fusion. 492 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 493 foldl1 _ Empty = errorEmptyList "foldl1" 494 foldl1 f (Chunk c cs) = foldl f (S.unsafeHead c) (Chunk (S.unsafeTail c) cs) 495 496 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. 497 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 498 foldl1' _ Empty = errorEmptyList "foldl1'" 499 foldl1' f (Chunk c cs) = foldl' f (S.unsafeHead c) (Chunk (S.unsafeTail c) cs) 500 501 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, 502 -- and thus must be applied to non-empty 'ByteString's 503 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 504 foldr1 _ Empty = errorEmptyList "foldr1" 505 foldr1 f (Chunk c0 cs0) = go c0 cs0 506 where go c Empty = S.foldr1 f c 507 go c (Chunk c' cs) = S.foldr f (go c' cs) c 508 509 -- --------------------------------------------------------------------- 510 -- Special folds 511 512 -- | /O(n)/ Concatenate a list of ByteStrings. 513 concat :: [ByteString] -> ByteString 514 concat css0 = to css0 515 where 516 go Empty css = to css 517 go (Chunk c cs) css = Chunk c (go cs css) 518 to [] = Empty 519 to (cs:css) = go cs css 520 521 -- | Map a function over a 'ByteString' and concatenate the results 522 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString 523 concatMap _ Empty = Empty 524 concatMap f (Chunk c0 cs0) = to c0 cs0 525 where 526 go :: ByteString -> S.ByteString -> ByteString -> ByteString 527 go Empty c' cs' = to c' cs' 528 go (Chunk c cs) c' cs' = Chunk c (go cs c' cs') 529 530 to :: S.ByteString -> ByteString -> ByteString 531 to c cs | S.null c = case cs of 532 Empty -> Empty 533 (Chunk c' cs') -> to c' cs' 534 | otherwise = go (f (S.unsafeHead c)) (S.unsafeTail c) cs 535 536 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if 537 -- any element of the 'ByteString' satisfies the predicate. 538 any :: (Word8 -> Bool) -> ByteString -> Bool 539 any f cs = foldrChunks (\c rest -> S.any f c || rest) False cs 540 {-# INLINE any #-} 541 -- todo fuse 542 543 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines 544 -- if all elements of the 'ByteString' satisfy the predicate. 545 all :: (Word8 -> Bool) -> ByteString -> Bool 546 all f cs = foldrChunks (\c rest -> S.all f c && rest) True cs 547 {-# INLINE all #-} 548 -- todo fuse 549 550 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString' 551 maximum :: ByteString -> Word8 552 maximum Empty = errorEmptyList "maximum" 553 maximum (Chunk c cs) = foldlChunks (\n c' -> n `max` S.maximum c') 554 (S.maximum c) cs 555 {-# INLINE maximum #-} 556 557 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' 558 minimum :: ByteString -> Word8 559 minimum Empty = errorEmptyList "minimum" 560 minimum (Chunk c cs) = foldlChunks (\n c' -> n `min` S.minimum c') 561 (S.minimum c) cs 562 {-# INLINE minimum #-} 563 564 -- | The 'mapAccumL' function behaves like a combination of 'map' and 565 -- 'foldl'; it applies a function to each element of a ByteString, 566 -- passing an accumulating parameter from left to right, and returning a 567 -- final value of this accumulator together with the new ByteString. 568 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 569 mapAccumL f s0 cs0 = go s0 cs0 570 where 571 go s Empty = (s, Empty) 572 go s (Chunk c cs) = (s'', Chunk c' cs') 573 where (s', c') = S.mapAccumL f s c 574 (s'', cs') = go s' cs 575 576 -- | The 'mapAccumR' function behaves like a combination of 'map' and 577 -- 'foldr'; it applies a function to each element of a ByteString, 578 -- passing an accumulating parameter from right to left, and returning a 579 -- final value of this accumulator together with the new ByteString. 580 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 581 mapAccumR f s0 cs0 = go s0 cs0 582 where 583 go s Empty = (s, Empty) 584 go s (Chunk c cs) = (s'', Chunk c' cs') 585 where (s'', c') = S.mapAccumR f s' c 586 (s', cs') = go s cs 587 588 -- --------------------------------------------------------------------- 589 -- Building ByteStrings 590 591 -- | 'scanl' is similar to 'foldl', but returns a list of successive 592 -- reduced values from the left. This function will fuse. 593 -- 594 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] 595 -- 596 -- Note that 597 -- 598 -- > last (scanl f z xs) == foldl f z xs. 599 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 600 scanl f z = snd . foldl k (z,singleton z) 601 where 602 k (c,acc) a = let n = f c a in (n, acc `snoc` n) 603 {-# INLINE scanl #-} 604 605 -- --------------------------------------------------------------------- 606 -- Unfolds and replicates 607 608 -- | @'iterate' f x@ returns an infinite ByteString of repeated applications 609 -- of @f@ to @x@: 610 -- 611 -- > iterate f x == [x, f x, f (f x), ...] 612 -- 613 iterate :: (Word8 -> Word8) -> Word8 -> ByteString 614 iterate f = unfoldr (\x -> case f x of x' -> x' `seq` Just (x', x')) 615 616 -- | @'repeat' x@ is an infinite ByteString, with @x@ the value of every 617 -- element. 618 -- 619 repeat :: Word8 -> ByteString 620 repeat w = cs where cs = Chunk (S.replicate smallChunkSize w) cs 621 622 -- | /O(n)/ @'replicate' n x@ is a ByteString of length @n@ with @x@ 623 -- the value of every element. 624 -- 625 replicate :: Int64 -> Word8 -> ByteString 626 replicate n w 627 | n <= 0 = Empty 628 | n < fromIntegral smallChunkSize = Chunk (S.replicate (fromIntegral n) w) Empty 629 | r == 0 = cs -- preserve invariant 630 | otherwise = Chunk (S.unsafeTake (fromIntegral r) c) cs 631 where 632 c = S.replicate smallChunkSize w 633 cs = nChunks q 634 (q, r) = quotRem n (fromIntegral smallChunkSize) 635 nChunks 0 = Empty 636 nChunks m = Chunk c (nChunks (m-1)) 637 638 -- | 'cycle' ties a finite ByteString into a circular one, or equivalently, 639 -- the infinite repetition of the original ByteString. 640 -- 641 cycle :: ByteString -> ByteString 642 cycle Empty = errorEmptyList "cycle" 643 cycle cs = cs' where cs' = foldrChunks Chunk cs' cs 644 645 -- | /O(n)/ The 'unfoldr' function is analogous to the List \'unfoldr\'. 646 -- 'unfoldr' builds a ByteString from a seed value. The function takes 647 -- the element and returns 'Nothing' if it is done producing the 648 -- ByteString or returns 'Just' @(a,b)@, in which case, @a@ is a 649 -- prepending to the ByteString and @b@ is used as the next element in a 650 -- recursive call. 651 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString 652 unfoldr f s0 = unfoldChunk 32 s0 653 where unfoldChunk n s = 654 case S.unfoldrN n f s of 655 (c, Nothing) 656 | S.null c -> Empty 657 | otherwise -> Chunk c Empty 658 (c, Just s') -> Chunk c (unfoldChunk (n*2) s') 659 660 -- --------------------------------------------------------------------- 661 -- Substrings 662 663 -- | /O(n\/c)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix 664 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. 665 take :: Int64 -> ByteString -> ByteString 666 take i _ | i <= 0 = Empty 667 take i cs0 = take' i cs0 668 where take' 0 _ = Empty 669 take' _ Empty = Empty 670 take' n (Chunk c cs) = 671 if n < fromIntegral (S.length c) 672 then Chunk (S.take (fromIntegral n) c) Empty 673 else Chunk c (take' (n - fromIntegral (S.length c)) cs) 674 675 -- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ 676 -- elements, or @[]@ if @n > 'length' xs@. 677 drop :: Int64 -> ByteString -> ByteString 678 drop i p | i <= 0 = p 679 drop i cs0 = drop' i cs0 680 where drop' 0 cs = cs 681 drop' _ Empty = Empty 682 drop' n (Chunk c cs) = 683 if n < fromIntegral (S.length c) 684 then Chunk (S.drop (fromIntegral n) c) cs 685 else drop' (n - fromIntegral (S.length c)) cs 686 687 -- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. 688 splitAt :: Int64 -> ByteString -> (ByteString, ByteString) 689 splitAt i cs0 | i <= 0 = (Empty, cs0) 690 splitAt i cs0 = splitAt' i cs0 691 where splitAt' 0 cs = (Empty, cs) 692 splitAt' _ Empty = (Empty, Empty) 693 splitAt' n (Chunk c cs) = 694 if n < fromIntegral (S.length c) 695 then (Chunk (S.take (fromIntegral n) c) Empty 696 ,Chunk (S.drop (fromIntegral n) c) cs) 697 else let (cs', cs'') = splitAt' (n - fromIntegral (S.length c)) cs 698 in (Chunk c cs', cs'') 699 700 701 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, 702 -- returns the longest prefix (possibly empty) of @xs@ of elements that 703 -- satisfy @p@. 704 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString 705 takeWhile f cs0 = takeWhile' cs0 706 where takeWhile' Empty = Empty 707 takeWhile' (Chunk c cs) = 708 case findIndexOrEnd (not . f) c of 709 0 -> Empty 710 n | n < S.length c -> Chunk (S.take n c) Empty 711 | otherwise -> Chunk c (takeWhile' cs) 712 713 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. 714 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString 715 dropWhile f cs0 = dropWhile' cs0 716 where dropWhile' Empty = Empty 717 dropWhile' (Chunk c cs) = 718 case findIndexOrEnd (not . f) c of 719 n | n < S.length c -> Chunk (S.drop n c) cs 720 | otherwise -> dropWhile' cs 721 722 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@. 723 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 724 break f cs0 = break' cs0 725 where break' Empty = (Empty, Empty) 726 break' (Chunk c cs) = 727 case findIndexOrEnd f c of 728 0 -> (Empty, Chunk c cs) 729 n | n < S.length c -> (Chunk (S.take n c) Empty 730 ,Chunk (S.drop n c) cs) 731 | otherwise -> let (cs', cs'') = break' cs 732 in (Chunk c cs', cs'') 733 734 -- 735 -- TODO 736 -- 737 -- Add rules 738 -- 739 740 {- 741 -- | 'breakByte' breaks its ByteString argument at the first occurence 742 -- of the specified byte. It is more efficient than 'break' as it is 743 -- implemented with @memchr(3)@. I.e. 744 -- 745 -- > break (=='c') "abcd" == breakByte 'c' "abcd" 746 -- 747 breakByte :: Word8 -> ByteString -> (ByteString, ByteString) 748 breakByte c (LPS ps) = case (breakByte' ps) of (a,b) -> (LPS a, LPS b) 749 where breakByte' [] = ([], []) 750 breakByte' (x:xs) = 751 case P.elemIndex c x of 752 Just 0 -> ([], x : xs) 753 Just n -> (P.take n x : [], P.drop n x : xs) 754 Nothing -> let (xs', xs'') = breakByte' xs 755 in (x : xs', xs'') 756 757 -- | 'spanByte' breaks its ByteString argument at the first 758 -- occurence of a byte other than its argument. It is more efficient 759 -- than 'span (==)' 760 -- 761 -- > span (=='c') "abcd" == spanByte 'c' "abcd" 762 -- 763 spanByte :: Word8 -> ByteString -> (ByteString, ByteString) 764 spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b) 765 where spanByte' [] = ([], []) 766 spanByte' (x:xs) = 767 case P.spanByte c x of 768 (x', x'') | P.null x' -> ([], x : xs) 769 | P.null x'' -> let (xs', xs'') = spanByte' xs 770 in (x : xs', xs'') 771 | otherwise -> (x' : [], x'' : xs) 772 -} 773 774 -- | 'span' @p xs@ breaks the ByteString into two segments. It is 775 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ 776 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 777 span p = break (not . p) 778 779 -- | /O(n)/ Splits a 'ByteString' into components delimited by 780 -- separators, where the predicate returns True for a separator element. 781 -- The resulting components do not contain the separators. Two adjacent 782 -- separators result in an empty component in the output. eg. 783 -- 784 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] 785 -- > splitWith (=='a') [] == [] 786 -- 787 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString] 788 splitWith _ Empty = [] 789 splitWith p (Chunk c0 cs0) = comb [] (S.splitWith p c0) cs0 790 791 where comb :: [S.ByteString] -> [S.ByteString] -> ByteString -> [ByteString] 792 comb acc (s:[]) Empty = revChunks (s:acc) : [] 793 comb acc (s:[]) (Chunk c cs) = comb (s:acc) (S.splitWith p c) cs 794 comb acc (s:ss) cs = revChunks (s:acc) : comb [] ss cs 795 796 {-# INLINE splitWith #-} 797 798 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte 799 -- argument, consuming the delimiter. I.e. 800 -- 801 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] 802 -- > split 'a' "aXaXaXa" == ["","X","X","X",""] 803 -- > split 'x' "x" == ["",""] 804 -- 805 -- and 806 -- 807 -- > intercalate [c] . split c == id 808 -- > split == splitWith . (==) 809 -- 810 -- As for all splitting functions in this library, this function does 811 -- not copy the substrings, it just constructs new 'ByteStrings' that 812 -- are slices of the original. 813 -- 814 split :: Word8 -> ByteString -> [ByteString] 815 split _ Empty = [] 816 split w (Chunk c0 cs0) = comb [] (S.split w c0) cs0 817 818 where comb :: [S.ByteString] -> [S.ByteString] -> ByteString -> [ByteString] 819 comb acc (s:[]) Empty = revChunks (s:acc) : [] 820 comb acc (s:[]) (Chunk c cs) = comb (s:acc) (S.split w c) cs 821 comb acc (s:ss) cs = revChunks (s:acc) : comb [] ss cs 822 {-# INLINE split #-} 823 824 {- 825 -- | Like 'splitWith', except that sequences of adjacent separators are 826 -- treated as a single separator. eg. 827 -- 828 -- > tokens (=='a') "aabbaca" == ["bb","c"] 829 -- 830 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString] 831 tokens f = L.filter (not.null) . splitWith f 832 -} 833 834 -- | The 'group' function takes a ByteString and returns a list of 835 -- ByteStrings such that the concatenation of the result is equal to the 836 -- argument. Moreover, each sublist in the result contains only equal 837 -- elements. For example, 838 -- 839 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"] 840 -- 841 -- It is a special case of 'groupBy', which allows the programmer to 842 -- supply their own equality test. 843 group :: ByteString -> [ByteString] 844 group Empty = [] 845 group (Chunk c0 cs0) = group' [] (S.group c0) cs0 846 where 847 group' :: [S.ByteString] -> [S.ByteString] -> ByteString -> [ByteString] 848 group' acc@(s':_) ss@(s:_) cs 849 | S.unsafeHead s' 850 /= S.unsafeHead s = revNonEmptyChunks acc : group' [] ss cs 851 group' acc (s:[]) Empty = revNonEmptyChunks (s:acc) : [] 852 group' acc (s:[]) (Chunk c cs) = group' (s:acc) (S.group c) cs 853 group' acc (s:ss) cs = revNonEmptyChunks (s:acc) : group' [] ss cs 854 855 {- 856 TODO: check if something like this might be faster 857 858 group :: ByteString -> [ByteString] 859 group xs 860 | null xs = [] 861 | otherwise = ys : group zs 862 where 863 (ys, zs) = spanByte (unsafeHead xs) xs 864 -} 865 866 -- | The 'groupBy' function is the non-overloaded version of 'group'. 867 -- 868 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] 869 groupBy _ Empty = [] 870 groupBy k (Chunk c0 cs0) = groupBy' [] 0 (S.groupBy k c0) cs0 871 where 872 groupBy' :: [S.ByteString] -> Word8 -> [S.ByteString] -> ByteString -> [ByteString] 873 groupBy' acc@(_:_) c ss@(s:_) cs 874 | not (c `k` S.unsafeHead s) = revNonEmptyChunks acc : groupBy' [] 0 ss cs 875 groupBy' acc _ (s:[]) Empty = revNonEmptyChunks (s : acc) : [] 876 groupBy' acc w (s:[]) (Chunk c cs) = groupBy' (s:acc) w' (S.groupBy k c) cs 877 where w' | L.null acc = S.unsafeHead s 878 | otherwise = w 879 groupBy' acc _ (s:ss) cs = revNonEmptyChunks (s : acc) : groupBy' [] 0 ss cs 880 881 {- 882 TODO: check if something like this might be faster 883 884 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] 885 groupBy k xs 886 | null xs = [] 887 | otherwise = take n xs : groupBy k (drop n xs) 888 where 889 n = 1 + findIndexOrEnd (not . k (head xs)) (tail xs) 890 -} 891 892 -- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of 893 -- 'ByteString's and concatenates the list after interspersing the first 894 -- argument between each element of the list. 895 intercalate :: ByteString -> [ByteString] -> ByteString 896 intercalate s = concat . (L.intersperse s) 897 898 -- --------------------------------------------------------------------- 899 -- Indexing ByteStrings 900 901 -- | /O(c)/ 'ByteString' index (subscript) operator, starting from 0. 902 index :: ByteString -> Int64 -> Word8 903 index _ i | i < 0 = moduleError "index" ("negative index: " ++ show i) 904 index cs0 i = index' cs0 i 905 where index' Empty n = moduleError "index" ("index too large: " ++ show n) 906 index' (Chunk c cs) n 907 | n >= fromIntegral (S.length c) = 908 index' cs (n - fromIntegral (S.length c)) 909 | otherwise = S.unsafeIndex c (fromIntegral n) 910 911 -- | /O(n)/ The 'elemIndex' function returns the index of the first 912 -- element in the given 'ByteString' which is equal to the query 913 -- element, or 'Nothing' if there is no such element. 914 -- This implementation uses memchr(3). 915 elemIndex :: Word8 -> ByteString -> Maybe Int64 916 elemIndex w cs0 = elemIndex' 0 cs0 917 where elemIndex' _ Empty = Nothing 918 elemIndex' n (Chunk c cs) = 919 case S.elemIndex w c of 920 Nothing -> elemIndex' (n + fromIntegral (S.length c)) cs 921 Just i -> Just (n + fromIntegral i) 922 923 {- 924 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the 925 -- element in the given 'ByteString' which is equal to the query 926 -- element, or 'Nothing' if there is no such element. The following 927 -- holds: 928 -- 929 -- > elemIndexEnd c xs == 930 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) 931 -- 932 elemIndexEnd :: Word8 -> ByteString -> Maybe Int 933 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> 934 go (p `plusPtr` s) (l-1) 935 where 936 STRICT2(go) 937 go p i | i < 0 = return Nothing 938 | otherwise = do ch' <- peekByteOff p i 939 if ch == ch' 940 then return $ Just i 941 else go p (i-1) 942 -} 943 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning 944 -- the indices of all elements equal to the query element, in ascending order. 945 -- This implementation uses memchr(3). 946 elemIndices :: Word8 -> ByteString -> [Int64] 947 elemIndices w cs0 = elemIndices' 0 cs0 948 where elemIndices' _ Empty = [] 949 elemIndices' n (Chunk c cs) = L.map ((+n).fromIntegral) (S.elemIndices w c) 950 ++ elemIndices' (n + fromIntegral (S.length c)) cs 951 952 -- | count returns the number of times its argument appears in the ByteString 953 -- 954 -- > count = length . elemIndices 955 -- 956 -- But more efficiently than using length on the intermediate list. 957 count :: Word8 -> ByteString -> Int64 958 count w cs = foldlChunks (\n c -> n + fromIntegral (S.count w c)) 0 cs 959 960 -- | The 'findIndex' function takes a predicate and a 'ByteString' and 961 -- returns the index of the first element in the ByteString 962 -- satisfying the predicate. 963 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int64 964 findIndex k cs0 = findIndex' 0 cs0 965 where findIndex' _ Empty = Nothing 966 findIndex' n (Chunk c cs) = 967 case S.findIndex k c of 968 Nothing -> findIndex' (n + fromIntegral (S.length c)) cs 969 Just i -> Just (n + fromIntegral i) 970 {-# INLINE findIndex #-} 971 972 -- | /O(n)/ The 'find' function takes a predicate and a ByteString, 973 -- and returns the first element in matching the predicate, or 'Nothing' 974 -- if there is no such element. 975 -- 976 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing 977 -- 978 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8 979 find f cs0 = find' cs0 980 where find' Empty = Nothing 981 find' (Chunk c cs) = case S.find f c of 982 Nothing -> find' cs 983 Just w -> Just w 984 {-# INLINE find #-} 985 986 -- | The 'findIndices' function extends 'findIndex', by returning the 987 -- indices of all elements satisfying the predicate, in ascending order. 988 findIndices :: (Word8 -> Bool) -> ByteString -> [Int64] 989 findIndices k cs0 = findIndices' 0 cs0 990 where findIndices' _ Empty = [] 991 findIndices' n (Chunk c cs) = L.map ((+n).fromIntegral) (S.findIndices k c) 992 ++ findIndices' (n + fromIntegral (S.length c)) cs 993 994 -- --------------------------------------------------------------------- 995 -- Searching ByteStrings 996 997 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate. 998 elem :: Word8 -> ByteString -> Bool 999 elem w cs = case elemIndex w cs of Nothing -> False ; _ -> True 1000 1001 -- | /O(n)/ 'notElem' is the inverse of 'elem' 1002 notElem :: Word8 -> ByteString -> Bool 1003 notElem w cs = not (elem w cs) 1004 1005 -- | /O(n)/ 'filter', applied to a predicate and a ByteString, 1006 -- returns a ByteString containing those characters that satisfy the 1007 -- predicate. 1008 filter :: (Word8 -> Bool) -> ByteString -> ByteString 1009 filter p s = go s 1010 where 1011 go Empty = Empty 1012 go (Chunk x xs) = chunk (S.filter p x) (go xs) 1013 {-# INLINE filter #-} 1014 1015 {- 1016 -- | /O(n)/ and /O(n\/c) space/ A first order equivalent of /filter . 1017 -- (==)/, for the common case of filtering a single byte. It is more 1018 -- efficient to use /filterByte/ in this case. 1019 -- 1020 -- > filterByte == filter . (==) 1021 -- 1022 -- filterByte is around 10x faster, and uses much less space, than its 1023 -- filter equivalent 1024 filterByte :: Word8 -> ByteString -> ByteString 1025 filterByte w ps = replicate (count w ps) w 1026 {-# INLINE filterByte #-} 1027 1028 {-# RULES 1029 "ByteString specialise filter (== x)" forall x. 1030 filter ((==) x) = filterByte x 1031 1032 "ByteString specialise filter (== x)" forall x. 1033 filter (== x) = filterByte x 1034 #-} 1035 -} 1036 1037 {- 1038 -- | /O(n)/ A first order equivalent of /filter . (\/=)/, for the common 1039 -- case of filtering a single byte out of a list. It is more efficient 1040 -- to use /filterNotByte/ in this case. 1041 -- 1042 -- > filterNotByte == filter . (/=) 1043 -- 1044 -- filterNotByte is around 2x faster than its filter equivalent. 1045 filterNotByte :: Word8 -> ByteString -> ByteString 1046 filterNotByte w (LPS xs) = LPS (filterMap (P.filterNotByte w) xs) 1047 -} 1048 1049 -- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns 1050 -- the pair of ByteStrings with elements which do and do not satisfy the 1051 -- predicate, respectively; i.e., 1052 -- 1053 -- > partition p bs == (filter p xs, filter (not . p) xs) 1054 -- 1055 partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 1056 partition f p = (filter f p, filter (not . f) p) 1057 --TODO: use a better implementation 1058 1059 -- --------------------------------------------------------------------- 1060 -- Searching for substrings 1061 1062 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True' 1063 -- iff the first is a prefix of the second. 1064 isPrefixOf :: ByteString -> ByteString -> Bool 1065 isPrefixOf Empty _ = True 1066 isPrefixOf _ Empty = False 1067 isPrefixOf (Chunk x xs) (Chunk y ys) 1068 | S.length x == S.length y = x == y && isPrefixOf xs ys 1069 | S.length x < S.length y = x == yh && isPrefixOf xs (Chunk yt ys) 1070 | otherwise = xh == y && isPrefixOf (Chunk xt xs) ys 1071 where (xh,xt) = S.splitAt (S.length y) x 1072 (yh,yt) = S.splitAt (S.length x) y 1073 1074 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True' 1075 -- iff the first is a suffix of the second. 1076 -- 1077 -- The following holds: 1078 -- 1079 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y 1080 -- 1081 isSuffixOf :: ByteString -> ByteString -> Bool 1082 isSuffixOf x y = reverse x `isPrefixOf` reverse y 1083 --TODO: a better implementation 1084 1085 -- --------------------------------------------------------------------- 1086 -- Zipping 1087 1088 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of 1089 -- corresponding pairs of bytes. If one input ByteString is short, 1090 -- excess elements of the longer ByteString are discarded. This is 1091 -- equivalent to a pair of 'unpack' operations. 1092 zip :: ByteString -> ByteString -> [(Word8,Word8)] 1093 zip = zipWith (,) 1094 1095 -- | 'zipWith' generalises 'zip' by zipping with the function given as 1096 -- the first argument, instead of a tupling function. For example, 1097 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of 1098 -- corresponding sums. 1099 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a] 1100 zipWith _ Empty _ = [] 1101 zipWith _ _ Empty = [] 1102 zipWith f (Chunk a as) (Chunk b bs) = go a as b bs 1103 where 1104 go x xs y ys = f (S.unsafeHead x) (S.unsafeHead y) 1105 : to (S.unsafeTail x) xs (S.unsafeTail y) ys 1106 1107 to x Empty _ _ | S.null x = [] 1108 to _ _ y Empty | S.null y = [] 1109 to x xs y ys | not (S.null x) 1110 && not (S.null y) = go x xs y ys 1111 to x xs _ (Chunk y' ys) | not (S.null x) = go x xs y' ys 1112 to _ (Chunk x' xs) y ys | not (S.null y) = go x' xs y ys 1113 to _ (Chunk x' xs) _ (Chunk y' ys) = go x' xs y' ys 1114 1115 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of 1116 -- ByteStrings. Note that this performs two 'pack' operations. 1117 unzip :: [(Word8,Word8)] -> (ByteString,ByteString) 1118 unzip ls = (pack (L.map fst ls), pack (L.map snd ls)) 1119 {-# INLINE unzip #-} 1120 1121 -- --------------------------------------------------------------------- 1122 -- Special lists 1123 1124 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first. 1125 inits :: ByteString -> [ByteString] 1126 inits = (Empty :) . inits' 1127 where inits' Empty = [] 1128 inits' (Chunk c cs) = L.map (\c' -> Chunk c' Empty) (L.tail (S.inits c)) 1129 ++ L.map (Chunk c) (inits' cs) 1130 1131 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first. 1132 tails :: ByteString -> [ByteString] 1133 tails Empty = Empty : [] 1134 tails cs@(Chunk c cs') 1135 | S.length c == 1 = cs : tails cs' 1136 | otherwise = cs : tails (Chunk (S.unsafeTail c) cs') 1137 1138 -- --------------------------------------------------------------------- 1139 -- Low level constructors 1140 1141 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage. 1142 -- This is mainly useful to allow the rest of the data pointed 1143 -- to by the 'ByteString' to be garbage collected, for example 1144 -- if a large string has been read in, and only a small part of it 1145 -- is needed in the rest of the program. 1146 copy :: ByteString -> ByteString 1147 copy cs = foldrChunks (Chunk . S.copy) Empty cs 1148 --TODO, we could coalese small blocks here 1149 --FIXME: probably not strict enough, if we're doing this to avoid retaining 1150 -- the parent blocks then we'd better copy strictly. 1151 1152 -- --------------------------------------------------------------------- 1153 1154 -- TODO defrag func that concatenates block together that are below a threshold 1155 -- defrag :: ByteString -> ByteString 1156 1157 -- --------------------------------------------------------------------- 1158 -- Lazy ByteString IO 1159 -- 1160 -- Rule for when to close: is it expected to read the whole file? 1161 -- If so, close when done. 1162 -- 1163 1164 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks 1165 -- are read on demand, in at most @k@-sized chunks. It does not block 1166 -- waiting for a whole @k@-sized chunk, so if less than @k@ bytes are 1167 -- available then they will be returned immediately as a smaller chunk. 1168 -- 1169 -- The handle is closed on EOF. 1170 -- 1171 hGetContentsN :: Int -> Handle -> IO ByteString 1172 hGetContentsN k h = lazyRead -- TODO close on exceptions 1173 where 1174 lazyRead = unsafeInterleaveIO loop 1175 1176 loop = do 1177 c <- S.hGetNonBlocking h k 1178 --TODO: I think this should distinguish EOF from no data available 1179 -- the underlying POSIX call makes this distincion, returning either 1180 -- 0 or EAGAIN 1181 if S.null c 1182 then do eof <- hIsEOF h 1183 if eof then hClose h >> return Empty 1184 else hWaitForInput h (-1) 1185 >> loop 1186 else do cs <- lazyRead 1187 return (Chunk c cs) 1188 1189 -- | Read @n@ bytes into a 'ByteString', directly from the 1190 -- specified 'Handle', in chunks of size @k@. 1191 -- 1192 hGetN :: Int -> Handle -> Int -> IO ByteString 1193 hGetN _ _ 0 = return empty 1194 hGetN k h n = readChunks n 1195 where 1196 STRICT1(readChunks) 1197 readChunks i = do 1198 c <- S.hGet h (min k i) 1199 case S.length c of 1200 0 -> return Empty 1201 m -> do cs <- readChunks (i - m) 1202 return (Chunk c cs) 1203 1204 -- | hGetNonBlockingN is similar to 'hGetContentsN', except that it will never block 1205 -- waiting for data to become available, instead it returns only whatever data 1206 -- is available. Chunks are read on demand, in @k@-sized chunks. 1207 -- 1208 hGetNonBlockingN :: Int -> Handle -> Int -> IO ByteString 1209 #if defined(__GLASGOW_HASKELL__) 1210 hGetNonBlockingN _ _ 0 = return Empty 1211 hGetNonBlockingN k h n = readChunks n 1212 where 1213 STRICT1(readChunks) 1214 readChunks i = do 1215 c <- S.hGetNonBlocking h (min k i) 1216 case S.length c of 1217 0 -> return Empty 1218 m -> do cs <- readChunks (i - m) 1219 return (Chunk c cs) 1220 #else 1221 hGetNonBlockingN = hGetN 1222 #endif 1223 1224 -- | Read entire handle contents /lazily/ into a 'ByteString'. Chunks 1225 -- are read on demand, using the default chunk size. 1226 -- 1227 -- Once EOF is encountered, the Handle is closed. 1228 -- 1229 hGetContents :: Handle -> IO ByteString 1230 hGetContents = hGetContentsN defaultChunkSize 1231 1232 -- | Read @n@ bytes into a 'ByteString', directly from the specified 'Handle'. 1233 -- 1234 hGet :: Handle -> Int -> IO ByteString 1235 hGet = hGetN defaultChunkSize 1236 1237 -- | hGetNonBlocking is similar to 'hGet', except that it will never block 1238 -- waiting for data to become available, instead it returns only whatever data 1239 -- is available. 1240 #if defined(__GLASGOW_HASKELL__) 1241 hGetNonBlocking :: Handle -> Int -> IO ByteString 1242 hGetNonBlocking = hGetNonBlockingN defaultChunkSize 1243 #else 1244 hGetNonBlocking = hGet 1245 #endif 1246 1247 -- | Read an entire file /lazily/ into a 'ByteString'. 1248 -- The Handle will be held open until EOF is encountered. 1249 -- 1250 readFile :: FilePath -> IO ByteString 1251 readFile f = openBinaryFile f ReadMode >>= hGetContents 1252 1253 -- | Write a 'ByteString' to a file. 1254 -- 1255 writeFile :: FilePath -> ByteString -> IO () 1256 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose 1257 (\hdl -> hPut hdl txt) 1258 1259 -- | Append a 'ByteString' to a file. 1260 -- 1261 appendFile :: FilePath -> ByteString -> IO () 1262 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose 1263 (\hdl -> hPut hdl txt) 1264 1265 -- | getContents. Equivalent to hGetContents stdin. Will read /lazily/ 1266 -- 1267 getContents :: IO ByteString 1268 getContents = hGetContents stdin 1269 1270 -- | Outputs a 'ByteString' to the specified 'Handle'. 1271 -- 1272 hPut :: Handle -> ByteString -> IO () 1273 hPut h cs = foldrChunks (\c rest -> S.hPut h c >> rest) (return ()) cs 1274 1275 -- | A synonym for @hPut@, for compatibility 1276 -- 1277 hPutStr :: Handle -> ByteString -> IO () 1278 hPutStr = hPut 1279 1280 -- | Write a ByteString to stdout 1281 putStr :: ByteString -> IO () 1282 putStr = hPut stdout 1283 1284 -- | Write a ByteString to stdout, appending a newline byte 1285 -- 1286 putStrLn :: ByteString -> IO () 1287 putStrLn ps = hPut stdout ps >> hPut stdout (singleton 0x0a) 1288 1289 -- | The interact function takes a function of type @ByteString -> ByteString@ 1290 -- as its argument. The entire input from the standard input device is passed 1291 -- to this function as its argument, and the resulting string is output on the 1292 -- standard output device. 1293 -- 1294 interact :: (ByteString -> ByteString) -> IO () 1295 interact transformer = putStr . transformer =<< getContents 1296 1297 -- --------------------------------------------------------------------- 1298 -- Internal utilities 1299 1300 -- Common up near identical calls to `error' to reduce the number 1301 -- constant strings created when compiled: 1302 errorEmptyList :: String -> a 1303 errorEmptyList fun = moduleError fun "empty ByteString" 1304 1305 moduleError :: String -> String -> a 1306 moduleError fun msg = error ("Data.ByteString.Lazy." ++ fun ++ ':':' ':msg) 1307 1308 1309 -- reverse a list of non-empty chunks into a lazy ByteString 1310 revNonEmptyChunks :: [S.ByteString] -> ByteString 1311 revNonEmptyChunks cs = L.foldl' (flip Chunk) Empty cs 1312 1313 -- reverse a list of possibly-empty chunks into a lazy ByteString 1314 revChunks :: [S.ByteString] -> ByteString 1315 revChunks cs = L.foldl' (flip chunk) Empty cs 1316 1317 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length 1318 -- of the string if no element is found, rather than Nothing. 1319 findIndexOrEnd :: (Word8 -> Bool) -> S.ByteString -> Int 1320 findIndexOrEnd k (S.PS x s l) = S.inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 1321 where 1322 STRICT2(go) 1323 go ptr n | n >= l = return l 1324 | otherwise = do w <- peek ptr 1325 if k w 1326 then return n 1327 else go (ptr `plusPtr` 1) (n+1) 1328 {-# INLINE findIndexOrEnd #-}