1 {-# LANGUAGE CPP #-} 2 {-# OPTIONS_GHC -XMagicHash -XUnboxedTuples #-} 3 4 -- #prune 5 6 -- | 7 -- Module : Data.ByteString 8 -- Copyright : (c) The University of Glasgow 2001, 9 -- (c) David Roundy 2003-2005, 10 -- (c) Simon Marlow 2005 11 -- (c) Bjorn Bringert 2006 12 -- (c) Don Stewart 2005-2008 13 -- 14 -- Array fusion code: 15 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller 16 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy 17 -- 18 -- License : BSD-style 19 -- 20 -- Maintainer : dons@cse.unsw.edu.au 21 -- Stability : experimental 22 -- Portability : portable 23 -- 24 -- A time and space-efficient implementation of byte vectors using 25 -- packed Word8 arrays, suitable for high performance use, both in terms 26 -- of large data quantities, or high speed requirements. Byte vectors 27 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr', 28 -- and can be passed between C and Haskell with little effort. 29 -- 30 -- This module is intended to be imported @qualified@, to avoid name 31 -- clashes with "Prelude" functions. eg. 32 -- 33 -- > import qualified Data.ByteString as B 34 -- 35 -- Original GHC implementation by Bryan O\'Sullivan. 36 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow. 37 -- Rewritten to support slices and use 'ForeignPtr' by David Roundy. 38 -- Polished and extended by Don Stewart. 39 -- 40 41 module Data.ByteString ( 42 43 -- * The @ByteString@ type 44 ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid 45 46 -- * Introducing and eliminating 'ByteString's 47 empty, -- :: ByteString 48 singleton, -- :: Word8 -> ByteString 49 pack, -- :: [Word8] -> ByteString 50 unpack, -- :: ByteString -> [Word8] 51 52 -- * Basic interface 53 cons, -- :: Word8 -> ByteString -> ByteString 54 snoc, -- :: ByteString -> Word8 -> ByteString 55 append, -- :: ByteString -> ByteString -> ByteString 56 head, -- :: ByteString -> Word8 57 uncons, -- :: ByteString -> Maybe (Word8, ByteString) 58 last, -- :: ByteString -> Word8 59 tail, -- :: ByteString -> ByteString 60 init, -- :: ByteString -> ByteString 61 null, -- :: ByteString -> Bool 62 length, -- :: ByteString -> Int 63 64 -- * Transforming ByteStrings 65 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString 66 reverse, -- :: ByteString -> ByteString 67 intersperse, -- :: Word8 -> ByteString -> ByteString 68 intercalate, -- :: ByteString -> [ByteString] -> ByteString 69 transpose, -- :: [ByteString] -> [ByteString] 70 71 -- * Reducing 'ByteString's (folds) 72 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a 73 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a 74 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 75 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 76 77 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a 78 foldr', -- :: (Word8 -> a -> a) -> a -> ByteString -> a 79 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 80 foldr1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 81 82 -- ** Special folds 83 concat, -- :: [ByteString] -> ByteString 84 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString 85 any, -- :: (Word8 -> Bool) -> ByteString -> Bool 86 all, -- :: (Word8 -> Bool) -> ByteString -> Bool 87 maximum, -- :: ByteString -> Word8 88 minimum, -- :: ByteString -> Word8 89 90 -- * Building ByteStrings 91 -- ** Scans 92 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 93 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 94 scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 95 scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 96 97 -- ** Accumulating maps 98 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 99 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 100 101 -- ** Generating and unfolding ByteStrings 102 replicate, -- :: Int -> Word8 -> ByteString 103 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString 104 unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) 105 106 -- * Substrings 107 108 -- ** Breaking strings 109 take, -- :: Int -> ByteString -> ByteString 110 drop, -- :: Int -> ByteString -> ByteString 111 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString) 112 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString 113 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString 114 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 115 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 116 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 117 breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 118 group, -- :: ByteString -> [ByteString] 119 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] 120 inits, -- :: ByteString -> [ByteString] 121 tails, -- :: ByteString -> [ByteString] 122 123 -- ** Breaking into many substrings 124 split, -- :: Word8 -> ByteString -> [ByteString] 125 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] 126 127 -- * Predicates 128 isPrefixOf, -- :: ByteString -> ByteString -> Bool 129 isSuffixOf, -- :: ByteString -> ByteString -> Bool 130 isInfixOf, -- :: ByteString -> ByteString -> Bool 131 132 -- ** Search for arbitrary substrings 133 breakSubstring, -- :: ByteString -> ByteString -> (ByteString,ByteString) 134 findSubstring, -- :: ByteString -> ByteString -> Maybe Int 135 findSubstrings, -- :: ByteString -> ByteString -> [Int] 136 137 -- * Searching ByteStrings 138 139 -- ** Searching by equality 140 elem, -- :: Word8 -> ByteString -> Bool 141 notElem, -- :: Word8 -> ByteString -> Bool 142 143 -- ** Searching with a predicate 144 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8 145 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString 146 partition, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 147 148 -- * Indexing ByteStrings 149 index, -- :: ByteString -> Int -> Word8 150 elemIndex, -- :: Word8 -> ByteString -> Maybe Int 151 elemIndices, -- :: Word8 -> ByteString -> [Int] 152 elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int 153 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int 154 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int] 155 count, -- :: Word8 -> ByteString -> Int 156 157 -- * Zipping and unzipping ByteStrings 158 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)] 159 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c] 160 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString) 161 162 -- * Ordered ByteStrings 163 sort, -- :: ByteString -> ByteString 164 165 -- * Low level conversions 166 -- ** Copying ByteStrings 167 copy, -- :: ByteString -> ByteString 168 169 -- ** Packing 'CString's and pointers 170 packCString, -- :: CString -> IO ByteString 171 packCStringLen, -- :: CStringLen -> IO ByteString 172 173 -- ** Using ByteStrings as 'CString's 174 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a 175 useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a 176 177 -- * I\/O with 'ByteString's 178 179 -- ** Standard input and output 180 getLine, -- :: IO ByteString 181 getContents, -- :: IO ByteString 182 putStr, -- :: ByteString -> IO () 183 putStrLn, -- :: ByteString -> IO () 184 interact, -- :: (ByteString -> ByteString) -> IO () 185 186 -- ** Files 187 readFile, -- :: FilePath -> IO ByteString 188 writeFile, -- :: FilePath -> ByteString -> IO () 189 appendFile, -- :: FilePath -> ByteString -> IO () 190 191 -- ** I\/O with Handles 192 hGetLine, -- :: Handle -> IO ByteString 193 hGetContents, -- :: Handle -> IO ByteString 194 hGet, -- :: Handle -> Int -> IO ByteString 195 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString 196 hPut, -- :: Handle -> ByteString -> IO () 197 hPutStr, -- :: Handle -> ByteString -> IO () 198 hPutStrLn, -- :: Handle -> ByteString -> IO () 199 200 breakByte 201 202 ) where 203 204 import qualified Prelude as P 205 import Prelude hiding (reverse,head,tail,last,init,null 206 ,length,map,lines,foldl,foldr,unlines 207 ,concat,any,take,drop,splitAt,takeWhile 208 ,dropWhile,span,break,elem,filter,maximum 209 ,minimum,all,concatMap,foldl1,foldr1 210 ,scanl,scanl1,scanr,scanr1 211 ,readFile,writeFile,appendFile,replicate 212 ,getContents,getLine,putStr,putStrLn,interact 213 ,zip,zipWith,unzip,notElem) 214 215 import Data.ByteString.Internal 216 import Data.ByteString.Unsafe 217 218 import qualified Data.List as List 219 220 import Data.Word (Word8) 221 import Data.Maybe (isJust, listToMaybe) 222 223 -- Control.Exception.bracket not available in yhc or nhc 224 #ifndef __NHC__ 225 import Control.Exception (finally, bracket, assert) 226 import qualified Control.Exception as Exception 227 #else 228 import IO (bracket, finally) 229 #endif 230 import Control.Monad (when) 231 232 import Foreign.C.String (CString, CStringLen) 233 import Foreign.C.Types (CSize) 234 import Foreign.ForeignPtr 235 import Foreign.Marshal.Alloc (allocaBytes, mallocBytes, reallocBytes, finalizerFree) 236 import Foreign.Marshal.Array (allocaArray) 237 import Foreign.Ptr 238 import Foreign.Storable (Storable(..)) 239 240 -- hGetBuf and hPutBuf not available in yhc or nhc 241 import System.IO (stdin,stdout,hClose,hFileSize 242 ,hGetBuf,hPutBuf,openBinaryFile 243 ,Handle,IOMode(..)) 244 245 import Data.Monoid (Monoid, mempty, mappend, mconcat) 246 247 #if !defined(__GLASGOW_HASKELL__) 248 import System.IO.Unsafe 249 import qualified System.Environment 250 import qualified System.IO (hGetLine) 251 #endif 252 253 #if defined(__GLASGOW_HASKELL__) 254 255 import System.IO (hGetBufNonBlocking) 256 import System.IO.Error (isEOFError) 257 258 import GHC.Handle 259 import GHC.Prim (Word#, (+#), writeWord8OffAddr#) 260 import GHC.Base (build) 261 import GHC.Word hiding (Word8) 262 import GHC.Ptr (Ptr(..)) 263 import GHC.ST (ST(..)) 264 import GHC.IOBase 265 266 #endif 267 268 -- An alternative to Control.Exception (assert) for nhc98 269 #ifdef __NHC__ 270 #define assert assertS "__FILE__ : __LINE__" 271 assertS :: String -> Bool -> a -> a 272 assertS _ True = id 273 assertS s False = error ("assertion failed at "++s) 274 #endif 275 276 -- ----------------------------------------------------------------------------- 277 -- 278 -- Useful macros, until we have bang patterns 279 -- 280 281 #define STRICT1(f) f a | a `seq` False = undefined 282 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined 283 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined 284 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined 285 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined 286 287 -- ----------------------------------------------------------------------------- 288 289 instance Eq ByteString where 290 (==) = eq 291 292 instance Ord ByteString where 293 compare = compareBytes 294 295 instance Monoid ByteString where 296 mempty = empty 297 mappend = append 298 mconcat = concat 299 300 -- | /O(n)/ Equality on the 'ByteString' type. 301 eq :: ByteString -> ByteString -> Bool 302 eq a@(PS p s l) b@(PS p' s' l') 303 | l /= l' = False -- short cut on length 304 | p == p' && s == s' = True -- short cut for the same string 305 | otherwise = compareBytes a b == EQ 306 {-# INLINE eq #-} 307 -- ^ still needed 308 309 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices. 310 compareBytes :: ByteString -> ByteString -> Ordering 311 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) 312 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings 313 | otherwise = inlinePerformIO $ 314 withForeignPtr x1 $ \p1 -> 315 withForeignPtr x2 $ \p2 -> do 316 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2) 317 return $! case i `compare` 0 of 318 EQ -> l1 `compare` l2 319 x -> x 320 321 {- 322 323 -- Pure Haskell version 324 325 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) 326 -- | len1 == 0 && len2 == 0 = EQ -- short cut for empty strings 327 -- | fp1 == fp2 && off1 == off2 && len1 == len2 = EQ -- short cut for the same string 328 | otherwise = inlinePerformIO $ 329 withForeignPtr fp1 $ \p1 -> 330 withForeignPtr fp2 $ \p2 -> 331 cmp (p1 `plusPtr` off1) 332 (p2 `plusPtr` off2) 0 len1 len2 333 334 -- XXX todo. 335 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering 336 cmp p1 p2 n len1 len2 337 | n == len1 = if n == len2 then return EQ else return LT 338 | n == len2 = return GT 339 | otherwise = do 340 a <- peekByteOff p1 n :: IO Word8 341 b <- peekByteOff p2 n 342 case a `compare` b of 343 EQ -> cmp p1 p2 (n+1) len1 len2 344 LT -> return LT 345 GT -> return GT 346 -} 347 348 -- ----------------------------------------------------------------------------- 349 -- Introducing and eliminating 'ByteString's 350 351 -- | /O(1)/ The empty 'ByteString' 352 empty :: ByteString 353 empty = PS nullForeignPtr 0 0 354 355 -- | /O(1)/ Convert a 'Word8' into a 'ByteString' 356 singleton :: Word8 -> ByteString 357 singleton c = unsafeCreate 1 $ \p -> poke p c 358 {-# INLINE [1] singleton #-} 359 360 -- Inline [1] for intercalate rule 361 362 -- 363 -- XXX The use of unsafePerformIO in allocating functions (unsafeCreate) is critical! 364 -- 365 -- Otherwise: 366 -- 367 -- singleton 255 `compare` singleton 127 368 -- 369 -- is compiled to: 370 -- 371 -- case mallocByteString 2 of 372 -- ForeignPtr f internals -> 373 -- case writeWord8OffAddr# f 0 255 of _ -> 374 -- case writeWord8OffAddr# f 0 127 of _ -> 375 -- case eqAddr# f f of 376 -- False -> case compare (GHC.Prim.plusAddr# f 0) 377 -- (GHC.Prim.plusAddr# f 0) 378 -- 379 -- 380 381 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. 382 -- 383 -- For applications with large numbers of string literals, pack can be a 384 -- bottleneck. In such cases, consider using packAddress (GHC only). 385 pack :: [Word8] -> ByteString 386 387 #if !defined(__GLASGOW_HASKELL__) 388 389 pack str = unsafeCreate (P.length str) $ \p -> go p str 390 where 391 go _ [] = return () 392 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff 393 394 #else /* hack away */ 395 396 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str) 397 where 398 go _ _ [] = return () 399 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs 400 401 writeByte p i c = ST $ \s# -> 402 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #) 403 404 #endif 405 406 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'. 407 unpack :: ByteString -> [Word8] 408 409 #if !defined(__GLASGOW_HASKELL__) 410 411 unpack (PS _ _ 0) = [] 412 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> 413 go (p `plusPtr` s) (l - 1) [] 414 where 415 STRICT3(go) 416 go p 0 acc = peek p >>= \e -> return (e : acc) 417 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc) 418 {-# INLINE unpack #-} 419 420 #else 421 422 unpack ps = build (unpackFoldr ps) 423 {-# INLINE unpack #-} 424 425 -- 426 -- Have unpack fuse with good list consumers 427 -- 428 -- critical this isn't strict in the acc 429 -- as it will break in the presence of list fusion. this is a known 430 -- issue with seq and build/foldr rewrite rules, which rely on lazy 431 -- demanding to avoid bottoms in the list. 432 -- 433 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a 434 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do 435 let loop q n _ | q `seq` n `seq` False = undefined -- n.b. 436 loop _ (-1) acc = return acc 437 loop q n acc = do 438 a <- peekByteOff q n 439 loop q (n-1) (a `f` acc) 440 loop (p `plusPtr` off) (len-1) ch 441 {-# INLINE [0] unpackFoldr #-} 442 443 unpackList :: ByteString -> [Word8] 444 unpackList (PS fp off len) = withPtr fp $ \p -> do 445 let STRICT3(loop) 446 loop _ (-1) acc = return acc 447 loop q n acc = do 448 a <- peekByteOff q n 449 loop q (n-1) (a : acc) 450 loop (p `plusPtr` off) (len-1) [] 451 452 {-# RULES 453 "ByteString unpack-list" [1] forall p . 454 unpackFoldr p (:) [] = unpackList p 455 #-} 456 457 #endif 458 459 -- --------------------------------------------------------------------- 460 -- Basic interface 461 462 -- | /O(1)/ Test whether a ByteString is empty. 463 null :: ByteString -> Bool 464 null (PS _ _ l) = assert (l >= 0) $ l <= 0 465 {-# INLINE null #-} 466 467 -- --------------------------------------------------------------------- 468 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'. 469 length :: ByteString -> Int 470 length (PS _ _ l) = assert (l >= 0) $ l 471 {-# INLINE length #-} 472 473 ------------------------------------------------------------------------ 474 475 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different 476 -- complexity, as it requires a memcpy. 477 cons :: Word8 -> ByteString -> ByteString 478 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do 479 poke p c 480 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l) 481 {-# INLINE cons #-} 482 483 -- | /O(n)/ Append a byte to the end of a 'ByteString' 484 snoc :: ByteString -> Word8 -> ByteString 485 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do 486 memcpy p (f `plusPtr` s) (fromIntegral l) 487 poke (p `plusPtr` l) c 488 {-# INLINE snoc #-} 489 490 -- todo fuse 491 492 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. 493 -- An exception will be thrown in the case of an empty ByteString. 494 head :: ByteString -> Word8 495 head (PS x s l) 496 | l <= 0 = errorEmptyList "head" 497 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s 498 {-# INLINE head #-} 499 500 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty. 501 -- An exception will be thrown in the case of an empty ByteString. 502 tail :: ByteString -> ByteString 503 tail (PS p s l) 504 | l <= 0 = errorEmptyList "tail" 505 | otherwise = PS p (s+1) (l-1) 506 {-# INLINE tail #-} 507 508 -- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing 509 -- if it is empty. 510 uncons :: ByteString -> Maybe (Word8, ByteString) 511 uncons (PS x s l) 512 | l <= 0 = Nothing 513 | otherwise = Just (inlinePerformIO $ withForeignPtr x 514 $ \p -> peekByteOff p s, 515 PS x (s+1) (l-1)) 516 {-# INLINE uncons #-} 517 518 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty. 519 -- An exception will be thrown in the case of an empty ByteString. 520 last :: ByteString -> Word8 521 last ps@(PS x s l) 522 | null ps = errorEmptyList "last" 523 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1) 524 {-# INLINE last #-} 525 526 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one. 527 -- An exception will be thrown in the case of an empty ByteString. 528 init :: ByteString -> ByteString 529 init ps@(PS p s l) 530 | null ps = errorEmptyList "init" 531 | otherwise = PS p s (l-1) 532 {-# INLINE init #-} 533 534 -- | /O(n)/ Append two ByteStrings 535 append :: ByteString -> ByteString -> ByteString 536 append xs ys | null xs = ys 537 | null ys = xs 538 | otherwise = concat [xs,ys] 539 {-# INLINE append #-} 540 541 -- --------------------------------------------------------------------- 542 -- Transformations 543 544 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each 545 -- element of @xs@. This function is subject to array fusion. 546 map :: (Word8 -> Word8) -> ByteString -> ByteString 547 map f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> 548 create len $ map_ 0 (a `plusPtr` s) 549 where 550 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO () 551 STRICT3(map_) 552 map_ n p1 p2 553 | n >= len = return () 554 | otherwise = do 555 x <- peekByteOff p1 n 556 pokeByteOff p2 n (f x) 557 map_ (n+1) p1 p2 558 {-# INLINE map #-} 559 560 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. 561 reverse :: ByteString -> ByteString 562 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> 563 c_reverse p (f `plusPtr` s) (fromIntegral l) 564 565 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a 566 -- 'ByteString' and \`intersperses\' that byte between the elements of 567 -- the 'ByteString'. It is analogous to the intersperse function on 568 -- Lists. 569 intersperse :: Word8 -> ByteString -> ByteString 570 intersperse c ps@(PS x s l) 571 | length ps < 2 = ps 572 | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f -> 573 c_intersperse p (f `plusPtr` s) (fromIntegral l) c 574 575 -- | The 'transpose' function transposes the rows and columns of its 576 -- 'ByteString' argument. 577 transpose :: [ByteString] -> [ByteString] 578 transpose ps = P.map pack (List.transpose (P.map unpack ps)) 579 580 -- --------------------------------------------------------------------- 581 -- Reducing 'ByteString's 582 583 -- | 'foldl', applied to a binary operator, a starting value (typically 584 -- the left-identity of the operator), and a ByteString, reduces the 585 -- ByteString using the binary operator, from left to right. 586 -- 587 -- This function is subject to array fusion. 588 -- 589 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a 590 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> 591 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) 592 where 593 STRICT3(lgo) 594 lgo z p q | p == q = return z 595 | otherwise = do c <- peek p 596 lgo (f z c) (p `plusPtr` 1) q 597 {-# INLINE foldl #-} 598 599 -- | 'foldl\'' is like 'foldl', but strict in the accumulator. 600 -- However, for ByteStrings, all left folds are strict in the accumulator. 601 -- 602 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a 603 foldl' = foldl 604 {-# INLINE foldl' #-} 605 606 -- | 'foldr', applied to a binary operator, a starting value 607 -- (typically the right-identity of the operator), and a ByteString, 608 -- reduces the ByteString using the binary operator, from right to left. 609 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a 610 foldr k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> 611 go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1)) 612 where 613 STRICT3(go) 614 go z p q | p == q = return z 615 | otherwise = do c <- peek p 616 go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive 617 {-# INLINE foldr #-} 618 619 -- | 'foldr\'' is like 'foldr', but strict in the accumulator. 620 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a 621 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> 622 go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1)) 623 where 624 STRICT3(go) 625 go z p q | p == q = return z 626 | otherwise = do c <- peek p 627 go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive 628 {-# INLINE foldr' #-} 629 630 -- | 'foldl1' is a variant of 'foldl' that has no starting value 631 -- argument, and thus must be applied to non-empty 'ByteStrings'. 632 -- This function is subject to array fusion. 633 -- An exception will be thrown in the case of an empty ByteString. 634 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 635 foldl1 f ps 636 | null ps = errorEmptyList "foldl1" 637 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps) 638 {-# INLINE foldl1 #-} 639 640 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. 641 -- An exception will be thrown in the case of an empty ByteString. 642 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 643 foldl1' f ps 644 | null ps = errorEmptyList "foldl1'" 645 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps) 646 {-# INLINE foldl1' #-} 647 648 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, 649 -- and thus must be applied to non-empty 'ByteString's 650 -- An exception will be thrown in the case of an empty ByteString. 651 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 652 foldr1 f ps 653 | null ps = errorEmptyList "foldr1" 654 | otherwise = foldr f (last ps) (init ps) 655 {-# INLINE foldr1 #-} 656 657 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the 658 -- accumulator. 659 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 660 foldr1' f ps 661 | null ps = errorEmptyList "foldr1" 662 | otherwise = foldr' f (last ps) (init ps) 663 {-# INLINE foldr1' #-} 664 665 -- --------------------------------------------------------------------- 666 -- Special folds 667 668 -- | /O(n)/ Concatenate a list of ByteStrings. 669 concat :: [ByteString] -> ByteString 670 concat [] = empty 671 concat [ps] = ps 672 concat xs = unsafeCreate len $ \ptr -> go xs ptr 673 where len = P.sum . P.map length $ xs 674 STRICT2(go) 675 go [] _ = return () 676 go (PS p s l:ps) ptr = do 677 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l) 678 go ps (ptr `plusPtr` l) 679 680 -- | Map a function over a 'ByteString' and concatenate the results 681 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString 682 concatMap f = concat . foldr ((:) . f) [] 683 684 -- foldr (append . f) empty 685 686 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if 687 -- any element of the 'ByteString' satisfies the predicate. 688 any :: (Word8 -> Bool) -> ByteString -> Bool 689 any _ (PS _ _ 0) = False 690 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> 691 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) 692 where 693 STRICT2(go) 694 go p q | p == q = return False 695 | otherwise = do c <- peek p 696 if f c then return True 697 else go (p `plusPtr` 1) q 698 {-# INLINE any #-} 699 700 -- todo fuse 701 702 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines 703 -- if all elements of the 'ByteString' satisfy the predicate. 704 all :: (Word8 -> Bool) -> ByteString -> Bool 705 all _ (PS _ _ 0) = True 706 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> 707 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) 708 where 709 STRICT2(go) 710 go p q | p == q = return True -- end of list 711 | otherwise = do c <- peek p 712 if f c 713 then go (p `plusPtr` 1) q 714 else return False 715 {-# INLINE all #-} 716 717 ------------------------------------------------------------------------ 718 719 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString' 720 -- This function will fuse. 721 -- An exception will be thrown in the case of an empty ByteString. 722 maximum :: ByteString -> Word8 723 maximum xs@(PS x s l) 724 | null xs = errorEmptyList "maximum" 725 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> 726 c_maximum (p `plusPtr` s) (fromIntegral l) 727 {-# INLINE maximum #-} 728 729 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' 730 -- This function will fuse. 731 -- An exception will be thrown in the case of an empty ByteString. 732 minimum :: ByteString -> Word8 733 minimum xs@(PS x s l) 734 | null xs = errorEmptyList "minimum" 735 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> 736 c_minimum (p `plusPtr` s) (fromIntegral l) 737 {-# INLINE minimum #-} 738 739 ------------------------------------------------------------------------ 740 741 -- | The 'mapAccumL' function behaves like a combination of 'map' and 742 -- 'foldl'; it applies a function to each element of a ByteString, 743 -- passing an accumulating parameter from left to right, and returning a 744 -- final value of this accumulator together with the new list. 745 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 746 mapAccumL f acc (PS fp o len) = inlinePerformIO $ withForeignPtr fp $ \a -> do 747 gp <- mallocByteString len 748 acc' <- withForeignPtr gp $ \p -> mapAccumL_ acc 0 (a `plusPtr` o) p 749 return $! (acc', PS gp 0 len) 750 where 751 STRICT4(mapAccumL_) 752 mapAccumL_ s n p1 p2 753 | n >= len = return s 754 | otherwise = do 755 x <- peekByteOff p1 n 756 let (s', y) = f s x 757 pokeByteOff p2 n y 758 mapAccumL_ s' (n+1) p1 p2 759 {-# INLINE mapAccumL #-} 760 761 -- | The 'mapAccumR' function behaves like a combination of 'map' and 762 -- 'foldr'; it applies a function to each element of a ByteString, 763 -- passing an accumulating parameter from right to left, and returning a 764 -- final value of this accumulator together with the new ByteString. 765 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 766 mapAccumR f acc (PS fp o len) = inlinePerformIO $ withForeignPtr fp $ \a -> do 767 gp <- mallocByteString len 768 acc' <- withForeignPtr gp $ \p -> mapAccumR_ acc (len-1) (a `plusPtr` o) p 769 return $! (acc', PS gp 0 len) 770 where 771 STRICT4(mapAccumR_) 772 mapAccumR_ s n p q 773 | n < 0 = return s 774 | otherwise = do 775 x <- peekByteOff p n 776 let (s', y) = f s x 777 pokeByteOff q n y 778 mapAccumR_ s' (n-1) p q 779 {-# INLINE mapAccumR #-} 780 781 -- --------------------------------------------------------------------- 782 -- Building ByteStrings 783 784 -- | 'scanl' is similar to 'foldl', but returns a list of successive 785 -- reduced values from the left. This function will fuse. 786 -- 787 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] 788 -- 789 -- Note that 790 -- 791 -- > last (scanl f z xs) == foldl f z xs. 792 -- 793 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 794 795 scanl f v (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> 796 create (len+1) $ \q -> do 797 poke q v 798 scanl_ v 0 (a `plusPtr` s) (q `plusPtr` 1) 799 where 800 STRICT4(scanl_) 801 scanl_ z n p q 802 | n >= len = return () 803 | otherwise = do 804 x <- peekByteOff p n 805 let z' = f z x 806 pokeByteOff q n z' 807 scanl_ z' (n+1) p q 808 {-# INLINE scanl #-} 809 810 -- n.b. haskell's List scan returns a list one bigger than the 811 -- input, so we need to snoc here to get some extra space, however, 812 -- it breaks map/up fusion (i.e. scanl . map no longer fuses) 813 814 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument. 815 -- This function will fuse. 816 -- 817 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] 818 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 819 scanl1 f ps 820 | null ps = empty 821 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps) 822 {-# INLINE scanl1 #-} 823 824 -- | scanr is the right-to-left dual of scanl. 825 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 826 scanr f v (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> 827 create (len+1) $ \q -> do 828 poke (q `plusPtr` len) v 829 scanr_ v (len-1) (a `plusPtr` s) q 830 where 831 STRICT4(scanr_) 832 scanr_ z n p q 833 | n < 0 = return () 834 | otherwise = do 835 x <- peekByteOff p n 836 let z' = f x z 837 pokeByteOff q n z' 838 scanr_ z' (n-1) p q 839 {-# INLINE scanr #-} 840 841 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument. 842 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 843 scanr1 f ps 844 | null ps = empty 845 | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions 846 {-# INLINE scanr1 #-} 847 848 -- --------------------------------------------------------------------- 849 -- Unfolds and replicates 850 851 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ 852 -- the value of every element. The following holds: 853 -- 854 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c 855 -- 856 -- This implemenation uses @memset(3)@ 857 replicate :: Int -> Word8 -> ByteString 858 replicate w c 859 | w <= 0 = empty 860 | otherwise = unsafeCreate w $ \ptr -> 861 memset ptr c (fromIntegral w) >> return () 862 863 -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' 864 -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a 865 -- ByteString from a seed value. The function takes the element and 866 -- returns 'Nothing' if it is done producing the ByteString or returns 867 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, 868 -- and @b@ is the seed value for further production. 869 -- 870 -- Examples: 871 -- 872 -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 873 -- > == pack [0, 1, 2, 3, 4, 5] 874 -- 875 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString 876 unfoldr f = concat . unfoldChunk 32 64 877 where unfoldChunk n n' x = 878 case unfoldrN n f x of 879 (s, Nothing) -> s : [] 880 (s, Just x') -> s : unfoldChunk n' (n+n') x' 881 {-# INLINE unfoldr #-} 882 883 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed 884 -- value. However, the length of the result is limited by the first 885 -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' 886 -- when the maximum length of the result is known. 887 -- 888 -- The following equation relates 'unfoldrN' and 'unfoldr': 889 -- 890 -- > unfoldrN n f s == take n (unfoldr f s) 891 -- 892 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) 893 unfoldrN i f x0 894 | i < 0 = (empty, Just x0) 895 | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0 896 where STRICT3(go) 897 go p x n = 898 case f x of 899 Nothing -> return (0, n, Nothing) 900 Just (w,x') 901 | n == i -> return (0, n, Just x) 902 | otherwise -> do poke p w 903 go (p `plusPtr` 1) x' (n+1) 904 {-# INLINE unfoldrN #-} 905 906 -- --------------------------------------------------------------------- 907 -- Substrings 908 909 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix 910 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. 911 take :: Int -> ByteString -> ByteString 912 take n ps@(PS x s l) 913 | n <= 0 = empty 914 | n >= l = ps 915 | otherwise = PS x s n 916 {-# INLINE take #-} 917 918 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ 919 -- elements, or @[]@ if @n > 'length' xs@. 920 drop :: Int -> ByteString -> ByteString 921 drop n ps@(PS x s l) 922 | n <= 0 = ps 923 | n >= l = empty 924 | otherwise = PS x (s+n) (l-n) 925 {-# INLINE drop #-} 926 927 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. 928 splitAt :: Int -> ByteString -> (ByteString, ByteString) 929 splitAt n ps@(PS x s l) 930 | n <= 0 = (empty, ps) 931 | n >= l = (ps, empty) 932 | otherwise = (PS x s n, PS x (s+n) (l-n)) 933 {-# INLINE splitAt #-} 934 935 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, 936 -- returns the longest prefix (possibly empty) of @xs@ of elements that 937 -- satisfy @p@. 938 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString 939 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps 940 {-# INLINE takeWhile #-} 941 942 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. 943 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString 944 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps 945 {-# INLINE dropWhile #-} 946 947 -- instead of findIndexOrEnd, we could use memchr here. 948 949 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@. 950 -- 951 -- Under GHC, a rewrite rule will transform break (==) into a 952 -- call to the specialised breakByte: 953 -- 954 -- > break ((==) x) = breakByte x 955 -- > break (==x) = breakByte x 956 -- 957 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 958 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps) 959 #if __GLASGOW_HASKELL__ 960 {-# INLINE [1] break #-} 961 #endif 962 963 {-# RULES 964 "ByteString specialise break (x==)" forall x. 965 break ((==) x) = breakByte x 966 "ByteString specialise break (==x)" forall x. 967 break (==x) = breakByte x 968 #-} 969 970 -- INTERNAL: 971 972 -- | 'breakByte' breaks its ByteString argument at the first occurence 973 -- of the specified byte. It is more efficient than 'break' as it is 974 -- implemented with @memchr(3)@. I.e. 975 -- 976 -- > break (=='c') "abcd" == breakByte 'c' "abcd" 977 -- 978 breakByte :: Word8 -> ByteString -> (ByteString, ByteString) 979 breakByte c p = case elemIndex c p of 980 Nothing -> (p,empty) 981 Just n -> (unsafeTake n p, unsafeDrop n p) 982 {-# INLINE breakByte #-} 983 984 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString' 985 -- 986 -- breakEnd p == spanEnd (not.p) 987 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 988 breakEnd p ps = splitAt (findFromEndUntil p ps) ps 989 990 -- | 'span' @p xs@ breaks the ByteString into two segments. It is 991 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ 992 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 993 span p ps = break (not . p) ps 994 #if __GLASGOW_HASKELL__ 995 {-# INLINE [1] span #-} 996 #endif 997 998 -- | 'spanByte' breaks its ByteString argument at the first 999 -- occurence of a byte other than its argument. It is more efficient 1000 -- than 'span (==)' 1001 -- 1002 -- > span (=='c') "abcd" == spanByte 'c' "abcd" 1003 -- 1004 spanByte :: Word8 -> ByteString -> (ByteString, ByteString) 1005 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> 1006 go (p `plusPtr` s) 0 1007 where 1008 STRICT2(go) 1009 go p i | i >= l = return (ps, empty) 1010 | otherwise = do c' <- peekByteOff p i 1011 if c /= c' 1012 then return (unsafeTake i ps, unsafeDrop i ps) 1013 else go p (i+1) 1014 {-# INLINE spanByte #-} 1015 1016 {-# RULES 1017 "ByteString specialise span (x==)" forall x. 1018 span ((==) x) = spanByte x 1019 "ByteString specialise span (==x)" forall x. 1020 span (==x) = spanByte x 1021 #-} 1022 1023 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'. 1024 -- We have 1025 -- 1026 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z") 1027 -- 1028 -- and 1029 -- 1030 -- > spanEnd (not . isSpace) ps 1031 -- > == 1032 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) 1033 -- 1034 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 1035 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps 1036 1037 -- | /O(n)/ Splits a 'ByteString' into components delimited by 1038 -- separators, where the predicate returns True for a separator element. 1039 -- The resulting components do not contain the separators. Two adjacent 1040 -- separators result in an empty component in the output. eg. 1041 -- 1042 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] 1043 -- > splitWith (=='a') [] == [] 1044 -- 1045 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString] 1046 1047 #if defined(__GLASGOW_HASKELL__) 1048 splitWith _pred (PS _ _ 0) = [] 1049 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp 1050 where pred# c# = pred_ (W8# c#) 1051 1052 STRICT4(splitWith0) 1053 splitWith0 pred' off' len' fp' = withPtr fp $ \p -> 1054 splitLoop pred' p 0 off' len' fp' 1055 1056 splitLoop :: (Word# -> Bool) 1057 -> Ptr Word8 1058 -> Int -> Int -> Int 1059 -> ForeignPtr Word8 1060 -> IO [ByteString] 1061 1062 splitLoop pred' p idx' off' len' fp' 1063 | idx' >= len' = return [PS fp' off' idx'] 1064 | otherwise = do 1065 w <- peekElemOff p (off'+idx') 1066 if pred' (case w of W8# w# -> w#) 1067 then return (PS fp' off' idx' : 1068 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp') 1069 else splitLoop pred' p (idx'+1) off' len' fp' 1070 {-# INLINE splitWith #-} 1071 1072 #else 1073 splitWith _ (PS _ _ 0) = [] 1074 splitWith p ps = loop p ps 1075 where 1076 STRICT2(loop) 1077 loop q qs = if null rest then [chunk] 1078 else chunk : loop q (unsafeTail rest) 1079 where (chunk,rest) = break q qs 1080 #endif 1081 1082 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte 1083 -- argument, consuming the delimiter. I.e. 1084 -- 1085 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] 1086 -- > split 'a' "aXaXaXa" == ["","X","X","X",""] 1087 -- > split 'x' "x" == ["",""] 1088 -- 1089 -- and 1090 -- 1091 -- > intercalate [c] . split c == id 1092 -- > split == splitWith . (==) 1093 -- 1094 -- As for all splitting functions in this library, this function does 1095 -- not copy the substrings, it just constructs new 'ByteStrings' that 1096 -- are slices of the original. 1097 -- 1098 split :: Word8 -> ByteString -> [ByteString] 1099 split _ (PS _ _ 0) = [] 1100 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do 1101 let ptr = p `plusPtr` s 1102 1103 STRICT1(loop) 1104 loop n = 1105 let q = inlinePerformIO $ memchr (ptr `plusPtr` n) 1106 w (fromIntegral (l-n)) 1107 in if q == nullPtr 1108 then [PS x (s+n) (l-n)] 1109 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1) 1110 1111 return (loop 0) 1112 {-# INLINE split #-} 1113 1114 {- 1115 -- slower. but stays inside Haskell. 1116 split _ (PS _ _ 0) = [] 1117 split (W8# w#) (PS fp off len) = splitWith' off len fp 1118 where 1119 splitWith' off' len' fp' = withPtr fp $ \p -> 1120 splitLoop p 0 off' len' fp' 1121 1122 splitLoop :: Ptr Word8 1123 -> Int -> Int -> Int 1124 -> ForeignPtr Word8 1125 -> IO [ByteString] 1126 1127 STRICT5(splitLoop) 1128 splitLoop p idx' off' len' fp' 1129 | idx' >= len' = return [PS fp' off' idx'] 1130 | otherwise = do 1131 (W8# x#) <- peekElemOff p (off'+idx') 1132 if word2Int# w# ==# word2Int# x# 1133 then return (PS fp' off' idx' : 1134 splitWith' (off'+idx'+1) (len'-idx'-1) fp') 1135 else splitLoop p (idx'+1) off' len' fp' 1136 -} 1137 1138 {- 1139 -- | Like 'splitWith', except that sequences of adjacent separators are 1140 -- treated as a single separator. eg. 1141 -- 1142 -- > tokens (=='a') "aabbaca" == ["bb","c"] 1143 -- 1144 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString] 1145 tokens f = P.filter (not.null) . splitWith f 1146 {-# INLINE tokens #-} 1147 -} 1148 1149 -- | The 'group' function takes a ByteString and returns a list of 1150 -- ByteStrings such that the concatenation of the result is equal to the 1151 -- argument. Moreover, each sublist in the result contains only equal 1152 -- elements. For example, 1153 -- 1154 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"] 1155 -- 1156 -- It is a special case of 'groupBy', which allows the programmer to 1157 -- supply their own equality test. It is about 40% faster than 1158 -- /groupBy (==)/ 1159 group :: ByteString -> [ByteString] 1160 group xs 1161 | null xs = [] 1162 | otherwise = ys : group zs 1163 where 1164 (ys, zs) = spanByte (unsafeHead xs) xs 1165 1166 -- | The 'groupBy' function is the non-overloaded version of 'group'. 1167 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] 1168 groupBy k xs 1169 | null xs = [] 1170 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs) 1171 where 1172 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs) 1173 1174 -- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of 1175 -- 'ByteString's and concatenates the list after interspersing the first 1176 -- argument between each element of the list. 1177 intercalate :: ByteString -> [ByteString] -> ByteString 1178 intercalate s = concat . (List.intersperse s) 1179 {-# INLINE [1] intercalate #-} 1180 1181 {-# RULES 1182 "ByteString specialise intercalate c -> intercalateByte" forall c s1 s2 . 1183 intercalate (singleton c) (s1 : s2 : []) = intercalateWithByte c s1 s2 1184 #-} 1185 1186 -- | /O(n)/ intercalateWithByte. An efficient way to join to two ByteStrings 1187 -- with a char. Around 4 times faster than the generalised join. 1188 -- 1189 intercalateWithByte :: Word8 -> ByteString -> ByteString -> ByteString 1190 intercalateWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr -> 1191 withForeignPtr ffp $ \fp -> 1192 withForeignPtr fgp $ \gp -> do 1193 memcpy ptr (fp `plusPtr` s) (fromIntegral l) 1194 poke (ptr `plusPtr` l) c 1195 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m) 1196 where 1197 len = length f + length g + 1 1198 {-# INLINE intercalateWithByte #-} 1199 1200 -- --------------------------------------------------------------------- 1201 -- Indexing ByteStrings 1202 1203 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0. 1204 index :: ByteString -> Int -> Word8 1205 index ps n 1206 | n < 0 = moduleError "index" ("negative index: " ++ show n) 1207 | n >= length ps = moduleError "index" ("index too large: " ++ show n 1208 ++ ", length = " ++ show (length ps)) 1209 | otherwise = ps `unsafeIndex` n 1210 {-# INLINE index #-} 1211 1212 -- | /O(n)/ The 'elemIndex' function returns the index of the first 1213 -- element in the given 'ByteString' which is equal to the query 1214 -- element, or 'Nothing' if there is no such element. 1215 -- This implementation uses memchr(3). 1216 elemIndex :: Word8 -> ByteString -> Maybe Int 1217 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do 1218 let p' = p `plusPtr` s 1219 q <- memchr p' c (fromIntegral l) 1220 return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p' 1221 {-# INLINE elemIndex #-} 1222 1223 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the 1224 -- element in the given 'ByteString' which is equal to the query 1225 -- element, or 'Nothing' if there is no such element. The following 1226 -- holds: 1227 -- 1228 -- > elemIndexEnd c xs == 1229 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) 1230 -- 1231 elemIndexEnd :: Word8 -> ByteString -> Maybe Int 1232 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> 1233 go (p `plusPtr` s) (l-1) 1234 where 1235 STRICT2(go) 1236 go p i | i < 0 = return Nothing 1237 | otherwise = do ch' <- peekByteOff p i 1238 if ch == ch' 1239 then return $ Just i 1240 else go p (i-1) 1241 {-# INLINE elemIndexEnd #-} 1242 1243 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning 1244 -- the indices of all elements equal to the query element, in ascending order. 1245 -- This implementation uses memchr(3). 1246 elemIndices :: Word8 -> ByteString -> [Int] 1247 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do 1248 let ptr = p `plusPtr` s 1249 1250 STRICT1(loop) 1251 loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n) 1252 w (fromIntegral (l - n)) 1253 in if q == nullPtr 1254 then [] 1255 else let i = q `minusPtr` ptr 1256 in i : loop (i+1) 1257 return $! loop 0 1258 {-# INLINE elemIndices #-} 1259 1260 {- 1261 -- much slower 1262 elemIndices :: Word8 -> ByteString -> [Int] 1263 elemIndices c ps = loop 0 ps 1264 where STRICT2(loop) 1265 loop _ ps' | null ps' = [] 1266 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps') 1267 | otherwise = loop (n+1) (unsafeTail ps') 1268 -} 1269 1270 -- | count returns the number of times its argument appears in the ByteString 1271 -- 1272 -- > count = length . elemIndices 1273 -- 1274 -- But more efficiently than using length on the intermediate list. 1275 count :: Word8 -> ByteString -> Int 1276 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> 1277 fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w 1278 {-# INLINE count #-} 1279 1280 {- 1281 -- 1282 -- around 30% slower 1283 -- 1284 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> 1285 go (p `plusPtr` s) (fromIntegral m) 0 1286 where 1287 go :: Ptr Word8 -> CSize -> Int -> IO Int 1288 STRICT3(go) 1289 go p l i = do 1290 q <- memchr p w l 1291 if q == nullPtr 1292 then return i 1293 else do let k = fromIntegral $ q `minusPtr` p 1294 go (q `plusPtr` 1) (l-k-1) (i+1) 1295 -} 1296 1297 -- | The 'findIndex' function takes a predicate and a 'ByteString' and 1298 -- returns the index of the first element in the ByteString 1299 -- satisfying the predicate. 1300 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int 1301 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 1302 where 1303 STRICT2(go) 1304 go ptr n | n >= l = return Nothing 1305 | otherwise = do w <- peek ptr 1306 if k w 1307 then return (Just n) 1308 else go (ptr `plusPtr` 1) (n+1) 1309 {-# INLINE findIndex #-} 1310 1311 -- | The 'findIndices' function extends 'findIndex', by returning the 1312 -- indices of all elements satisfying the predicate, in ascending order. 1313 findIndices :: (Word8 -> Bool) -> ByteString -> [Int] 1314 findIndices p ps = loop 0 ps 1315 where 1316 STRICT2(loop) 1317 loop n qs | null qs = [] 1318 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs) 1319 | otherwise = loop (n+1) (unsafeTail qs) 1320 1321 -- --------------------------------------------------------------------- 1322 -- Searching ByteStrings 1323 1324 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate. 1325 elem :: Word8 -> ByteString -> Bool 1326 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True 1327 {-# INLINE elem #-} 1328 1329 -- | /O(n)/ 'notElem' is the inverse of 'elem' 1330 notElem :: Word8 -> ByteString -> Bool 1331 notElem c ps = not (elem c ps) 1332 {-# INLINE notElem #-} 1333 1334 -- | /O(n)/ 'filter', applied to a predicate and a ByteString, 1335 -- returns a ByteString containing those characters that satisfy the 1336 -- predicate. This function is subject to array fusion. 1337 filter :: (Word8 -> Bool) -> ByteString -> ByteString 1338 filter k ps@(PS x s l) 1339 | null ps = ps 1340 | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do 1341 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l)) 1342 return $! t `minusPtr` p -- actual length 1343 where 1344 STRICT3(go) 1345 go f t end | f == end = return t 1346 | otherwise = do 1347 w <- peek f 1348 if k w 1349 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end 1350 else go (f `plusPtr` 1) t end 1351 {-# INLINE filter #-} 1352 1353 {- 1354 -- 1355 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common 1356 -- case of filtering a single byte. It is more efficient to use 1357 -- /filterByte/ in this case. 1358 -- 1359 -- > filterByte == filter . (==) 1360 -- 1361 -- filterByte is around 10x faster, and uses much less space, than its 1362 -- filter equivalent 1363 -- 1364 filterByte :: Word8 -> ByteString -> ByteString 1365 filterByte w ps = replicate (count w ps) w 1366 {-# INLINE filterByte #-} 1367 1368 {-# RULES 1369 "ByteString specialise filter (== x)" forall x. 1370 filter ((==) x) = filterByte x 1371 "ByteString specialise filter (== x)" forall x. 1372 filter (== x) = filterByte x 1373 #-} 1374 -} 1375 1376 -- | /O(n)/ The 'find' function takes a predicate and a ByteString, 1377 -- and returns the first element in matching the predicate, or 'Nothing' 1378 -- if there is no such element. 1379 -- 1380 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing 1381 -- 1382 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8 1383 find f p = case findIndex f p of 1384 Just n -> Just (p `unsafeIndex` n) 1385 _ -> Nothing 1386 {-# INLINE find #-} 1387 1388 {- 1389 -- 1390 -- fuseable, but we don't want to walk the whole array. 1391 -- 1392 find k = foldl findEFL Nothing 1393 where findEFL a@(Just _) _ = a