1 {-# LANGUAGE CPP #-} 2 {-# OPTIONS_GHC -XMagicHash -XUnboxedTuples #-} 3 4 -- #prune 5 6 -- | 7 -- Module : Data.ByteString 8 -- Copyright : (c) The University of Glasgow 2001, 9 -- (c) David Roundy 2003-2005, 10 -- (c) Simon Marlow 2005 11 -- (c) Bjorn Bringert 2006 12 -- (c) Don Stewart 2005-2008 13 -- 14 -- Array fusion code: 15 -- (c) 2001,2002 Manuel M T Chakravarty & Gabriele Keller 16 -- (c) 2006 Manuel M T Chakravarty & Roman Leshchinskiy 17 -- 18 -- License : BSD-style 19 -- 20 -- Maintainer : dons@cse.unsw.edu.au 21 -- Stability : experimental 22 -- Portability : portable 23 -- 24 -- A time and space-efficient implementation of byte vectors using 25 -- packed Word8 arrays, suitable for high performance use, both in terms 26 -- of large data quantities, or high speed requirements. Byte vectors 27 -- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr', 28 -- and can be passed between C and Haskell with little effort. 29 -- 30 -- This module is intended to be imported @qualified@, to avoid name 31 -- clashes with "Prelude" functions. eg. 32 -- 33 -- > import qualified Data.ByteString as B 34 -- 35 -- Original GHC implementation by Bryan O\'Sullivan. 36 -- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow. 37 -- Rewritten to support slices and use 'ForeignPtr' by David Roundy. 38 -- Polished and extended by Don Stewart. 39 -- 40 41 module Data.ByteString ( 42 43 -- * The @ByteString@ type 44 ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid 45 46 -- * Introducing and eliminating 'ByteString's 47 empty, -- :: ByteString 48 singleton, -- :: Word8 -> ByteString 49 pack, -- :: [Word8] -> ByteString 50 unpack, -- :: ByteString -> [Word8] 51 52 -- * Basic interface 53 cons, -- :: Word8 -> ByteString -> ByteString 54 snoc, -- :: ByteString -> Word8 -> ByteString 55 append, -- :: ByteString -> ByteString -> ByteString 56 head, -- :: ByteString -> Word8 57 uncons, -- :: ByteString -> Maybe (Word8, ByteString) 58 last, -- :: ByteString -> Word8 59 tail, -- :: ByteString -> ByteString 60 init, -- :: ByteString -> ByteString 61 null, -- :: ByteString -> Bool 62 length, -- :: ByteString -> Int 63 64 -- * Transforming ByteStrings 65 map, -- :: (Word8 -> Word8) -> ByteString -> ByteString 66 reverse, -- :: ByteString -> ByteString 67 intersperse, -- :: Word8 -> ByteString -> ByteString 68 intercalate, -- :: ByteString -> [ByteString] -> ByteString 69 transpose, -- :: [ByteString] -> [ByteString] 70 71 -- * Reducing 'ByteString's (folds) 72 foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a 73 foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a 74 foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 75 foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 76 77 foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a 78 foldr', -- :: (Word8 -> a -> a) -> a -> ByteString -> a 79 foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 80 foldr1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 81 82 -- ** Special folds 83 concat, -- :: [ByteString] -> ByteString 84 concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString 85 any, -- :: (Word8 -> Bool) -> ByteString -> Bool 86 all, -- :: (Word8 -> Bool) -> ByteString -> Bool 87 maximum, -- :: ByteString -> Word8 88 minimum, -- :: ByteString -> Word8 89 90 -- * Building ByteStrings 91 -- ** Scans 92 scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 93 scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 94 scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 95 scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 96 97 -- ** Accumulating maps 98 mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 99 mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 100 101 -- ** Generating and unfolding ByteStrings 102 replicate, -- :: Int -> Word8 -> ByteString 103 unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString 104 unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) 105 106 -- * Substrings 107 108 -- ** Breaking strings 109 take, -- :: Int -> ByteString -> ByteString 110 drop, -- :: Int -> ByteString -> ByteString 111 splitAt, -- :: Int -> ByteString -> (ByteString, ByteString) 112 takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString 113 dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString 114 span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 115 spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 116 break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 117 breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 118 group, -- :: ByteString -> [ByteString] 119 groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] 120 inits, -- :: ByteString -> [ByteString] 121 tails, -- :: ByteString -> [ByteString] 122 123 -- ** Breaking into many substrings 124 split, -- :: Word8 -> ByteString -> [ByteString] 125 splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString] 126 127 -- * Predicates 128 isPrefixOf, -- :: ByteString -> ByteString -> Bool 129 isSuffixOf, -- :: ByteString -> ByteString -> Bool 130 isInfixOf, -- :: ByteString -> ByteString -> Bool 131 132 -- ** Search for arbitrary substrings 133 breakSubstring, -- :: ByteString -> ByteString -> (ByteString,ByteString) 134 findSubstring, -- :: ByteString -> ByteString -> Maybe Int 135 findSubstrings, -- :: ByteString -> ByteString -> [Int] 136 137 -- * Searching ByteStrings 138 139 -- ** Searching by equality 140 elem, -- :: Word8 -> ByteString -> Bool 141 notElem, -- :: Word8 -> ByteString -> Bool 142 143 -- ** Searching with a predicate 144 find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8 145 filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString 146 partition, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 147 148 -- * Indexing ByteStrings 149 index, -- :: ByteString -> Int -> Word8 150 elemIndex, -- :: Word8 -> ByteString -> Maybe Int 151 elemIndices, -- :: Word8 -> ByteString -> [Int] 152 elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int 153 findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int 154 findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int] 155 count, -- :: Word8 -> ByteString -> Int 156 157 -- * Zipping and unzipping ByteStrings 158 zip, -- :: ByteString -> ByteString -> [(Word8,Word8)] 159 zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c] 160 unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString) 161 162 -- * Ordered ByteStrings 163 sort, -- :: ByteString -> ByteString 164 165 -- * Low level conversions 166 -- ** Copying ByteStrings 167 copy, -- :: ByteString -> ByteString 168 169 -- ** Packing 'CString's and pointers 170 packCString, -- :: CString -> IO ByteString 171 packCStringLen, -- :: CStringLen -> IO ByteString 172 173 -- ** Using ByteStrings as 'CString's 174 useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a 175 useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a 176 177 -- * I\/O with 'ByteString's 178 179 -- ** Standard input and output 180 getLine, -- :: IO ByteString 181 getContents, -- :: IO ByteString 182 putStr, -- :: ByteString -> IO () 183 putStrLn, -- :: ByteString -> IO () 184 interact, -- :: (ByteString -> ByteString) -> IO () 185 186 -- ** Files 187 readFile, -- :: FilePath -> IO ByteString 188 writeFile, -- :: FilePath -> ByteString -> IO () 189 appendFile, -- :: FilePath -> ByteString -> IO () 190 191 -- ** I\/O with Handles 192 hGetLine, -- :: Handle -> IO ByteString 193 hGetContents, -- :: Handle -> IO ByteString 194 hGet, -- :: Handle -> Int -> IO ByteString 195 hGetNonBlocking, -- :: Handle -> Int -> IO ByteString 196 hPut, -- :: Handle -> ByteString -> IO () 197 hPutStr, -- :: Handle -> ByteString -> IO () 198 hPutStrLn, -- :: Handle -> ByteString -> IO () 199 200 breakByte 201 202 ) where 203 204 import qualified Prelude as P 205 import Prelude hiding (reverse,head,tail,last,init,null 206 ,length,map,lines,foldl,foldr,unlines 207 ,concat,any,take,drop,splitAt,takeWhile 208 ,dropWhile,span,break,elem,filter,maximum 209 ,minimum,all,concatMap,foldl1,foldr1 210 ,scanl,scanl1,scanr,scanr1 211 ,readFile,writeFile,appendFile,replicate 212 ,getContents,getLine,putStr,putStrLn,interact 213 ,zip,zipWith,unzip,notElem) 214 215 import Data.ByteString.Internal 216 import Data.ByteString.Unsafe 217 218 import qualified Data.List as List 219 220 import Data.Word (Word8) 221 import Data.Maybe (isJust, listToMaybe) 222 223 -- Control.Exception.bracket not available in yhc or nhc 224 #ifndef __NHC__ 225 import Control.Exception (finally, bracket, assert) 226 import qualified Control.Exception as Exception 227 #else 228 import IO (bracket, finally) 229 #endif 230 import Control.Monad (when) 231 232 import Foreign.C.String (CString, CStringLen) 233 import Foreign.C.Types (CSize) 234 import Foreign.ForeignPtr 235 import Foreign.Marshal.Alloc (allocaBytes, mallocBytes, reallocBytes, finalizerFree) 236 import Foreign.Marshal.Array (allocaArray) 237 import Foreign.Ptr 238 import Foreign.Storable (Storable(..)) 239 240 -- hGetBuf and hPutBuf not available in yhc or nhc 241 import System.IO (stdin,stdout,hClose,hFileSize 242 ,hGetBuf,hPutBuf,openBinaryFile 243 ,Handle,IOMode(..)) 244 245 import Data.Monoid (Monoid, mempty, mappend, mconcat) 246 247 #if !defined(__GLASGOW_HASKELL__) 248 import System.IO.Unsafe 249 import qualified System.Environment 250 import qualified System.IO (hGetLine) 251 #endif 252 253 #if defined(__GLASGOW_HASKELL__) 254 255 import System.IO (hGetBufNonBlocking) 256 import System.IO.Error (isEOFError) 257 258 import GHC.Handle 259 import GHC.Prim (Word#, (+#), writeWord8OffAddr#) 260 import GHC.Base (build) 261 import GHC.Word hiding (Word8) 262 import GHC.Ptr (Ptr(..)) 263 import GHC.ST (ST(..)) 264 import GHC.IOBase 265 266 #endif 267 268 -- An alternative to Control.Exception (assert) for nhc98 269 #ifdef __NHC__ 270 #define assert assertS "__FILE__ : __LINE__" 271 assertS :: String -> Bool -> a -> a 272 assertS _ True = id 273 assertS s False = error ("assertion failed at "++s) 274 #endif 275 276 -- ----------------------------------------------------------------------------- 277 -- 278 -- Useful macros, until we have bang patterns 279 -- 280 281 #define STRICT1(f) f a | a `seq` False = undefined 282 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined 283 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined 284 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined 285 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined 286 287 -- ----------------------------------------------------------------------------- 288 289 instance Eq ByteString where 290 (==) = eq 291 292 instance Ord ByteString where 293 compare = compareBytes 294 295 instance Monoid ByteString where 296 mempty = empty 297 mappend = append 298 mconcat = concat 299 300 -- | /O(n)/ Equality on the 'ByteString' type. 301 eq :: ByteString -> ByteString -> Bool 302 eq a@(PS p s l) b@(PS p' s' l') 303 | l /= l' = False -- short cut on length 304 | p == p' && s == s' = True -- short cut for the same string 305 | otherwise = compareBytes a b == EQ 306 {-# INLINE eq #-} 307 -- ^ still needed 308 309 -- | /O(n)/ 'compareBytes' provides an 'Ordering' for 'ByteStrings' supporting slices. 310 compareBytes :: ByteString -> ByteString -> Ordering 311 compareBytes (PS x1 s1 l1) (PS x2 s2 l2) 312 | l1 == 0 && l2 == 0 = EQ -- short cut for empty strings 313 | otherwise = inlinePerformIO $ 314 withForeignPtr x1 $ \p1 -> 315 withForeignPtr x2 $ \p2 -> do 316 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral $ min l1 l2) 317 return $! case i `compare` 0 of 318 EQ -> l1 `compare` l2 319 x -> x 320 321 {- 322 323 -- Pure Haskell version 324 325 compareBytes (PS fp1 off1 len1) (PS fp2 off2 len2) 326 -- | len1 == 0 && len2 == 0 = EQ -- short cut for empty strings 327 -- | fp1 == fp2 && off1 == off2 && len1 == len2 = EQ -- short cut for the same string 328 | otherwise = inlinePerformIO $ 329 withForeignPtr fp1 $ \p1 -> 330 withForeignPtr fp2 $ \p2 -> 331 cmp (p1 `plusPtr` off1) 332 (p2 `plusPtr` off2) 0 len1 len2 333 334 -- XXX todo. 335 cmp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Ordering 336 cmp p1 p2 n len1 len2 337 | n == len1 = if n == len2 then return EQ else return LT 338 | n == len2 = return GT 339 | otherwise = do 340 a <- peekByteOff p1 n :: IO Word8 341 b <- peekByteOff p2 n 342 case a `compare` b of 343 EQ -> cmp p1 p2 (n+1) len1 len2 344 LT -> return LT 345 GT -> return GT 346 -} 347 348 -- ----------------------------------------------------------------------------- 349 -- Introducing and eliminating 'ByteString's 350 351 -- | /O(1)/ The empty 'ByteString' 352 empty :: ByteString 353 empty = PS nullForeignPtr 0 0 354 355 -- | /O(1)/ Convert a 'Word8' into a 'ByteString' 356 singleton :: Word8 -> ByteString 357 singleton c = unsafeCreate 1 $ \p -> poke p c 358 {-# INLINE [1] singleton #-} 359 360 -- Inline [1] for intercalate rule 361 362 -- 363 -- XXX The use of unsafePerformIO in allocating functions (unsafeCreate) is critical! 364 -- 365 -- Otherwise: 366 -- 367 -- singleton 255 `compare` singleton 127 368 -- 369 -- is compiled to: 370 -- 371 -- case mallocByteString 2 of 372 -- ForeignPtr f internals -> 373 -- case writeWord8OffAddr# f 0 255 of _ -> 374 -- case writeWord8OffAddr# f 0 127 of _ -> 375 -- case eqAddr# f f of 376 -- False -> case compare (GHC.Prim.plusAddr# f 0) 377 -- (GHC.Prim.plusAddr# f 0) 378 -- 379 -- 380 381 -- | /O(n)/ Convert a '[Word8]' into a 'ByteString'. 382 -- 383 -- For applications with large numbers of string literals, pack can be a 384 -- bottleneck. In such cases, consider using packAddress (GHC only). 385 pack :: [Word8] -> ByteString 386 387 #if !defined(__GLASGOW_HASKELL__) 388 389 pack str = unsafeCreate (P.length str) $ \p -> go p str 390 where 391 go _ [] = return () 392 go p (x:xs) = poke p x >> go (p `plusPtr` 1) xs -- less space than pokeElemOff 393 394 #else /* hack away */ 395 396 pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str) 397 where 398 go _ _ [] = return () 399 go p i (W8# c:cs) = writeByte p i c >> go p (i +# 1#) cs 400 401 writeByte p i c = ST $ \s# -> 402 case writeWord8OffAddr# p i c s# of s2# -> (# s2#, () #) 403 404 #endif 405 406 -- | /O(n)/ Converts a 'ByteString' to a '[Word8]'. 407 unpack :: ByteString -> [Word8] 408 409 #if !defined(__GLASGOW_HASKELL__) 410 411 unpack (PS _ _ 0) = [] 412 unpack (PS ps s l) = inlinePerformIO $ withForeignPtr ps $ \p -> 413 go (p `plusPtr` s) (l - 1) [] 414 where 415 STRICT3(go) 416 go p 0 acc = peek p >>= \e -> return (e : acc) 417 go p n acc = peekByteOff p n >>= \e -> go p (n-1) (e : acc) 418 {-# INLINE unpack #-} 419 420 #else 421 422 unpack ps = build (unpackFoldr ps) 423 {-# INLINE unpack #-} 424 425 -- 426 -- Have unpack fuse with good list consumers 427 -- 428 -- critical this isn't strict in the acc 429 -- as it will break in the presence of list fusion. this is a known 430 -- issue with seq and build/foldr rewrite rules, which rely on lazy 431 -- demanding to avoid bottoms in the list. 432 -- 433 unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a 434 unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do 435 let loop q n _ | q `seq` n `seq` False = undefined -- n.b. 436 loop _ (-1) acc = return acc 437 loop q n acc = do 438 a <- peekByteOff q n 439 loop q (n-1) (a `f` acc) 440 loop (p `plusPtr` off) (len-1) ch 441 {-# INLINE [0] unpackFoldr #-} 442 443 unpackList :: ByteString -> [Word8] 444 unpackList (PS fp off len) = withPtr fp $ \p -> do 445 let STRICT3(loop) 446 loop _ (-1) acc = return acc 447 loop q n acc = do 448 a <- peekByteOff q n 449 loop q (n-1) (a : acc) 450 loop (p `plusPtr` off) (len-1) [] 451 452 {-# RULES 453 "ByteString unpack-list" [1] forall p . 454 unpackFoldr p (:) [] = unpackList p 455 #-} 456 457 #endif 458 459 -- --------------------------------------------------------------------- 460 -- Basic interface 461 462 -- | /O(1)/ Test whether a ByteString is empty. 463 null :: ByteString -> Bool 464 null (PS _ _ l) = assert (l >= 0) $ l <= 0 465 {-# INLINE null #-} 466 467 -- --------------------------------------------------------------------- 468 -- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'. 469 length :: ByteString -> Int 470 length (PS _ _ l) = assert (l >= 0) $ l 471 {-# INLINE length #-} 472 473 ------------------------------------------------------------------------ 474 475 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different 476 -- complexity, as it requires a memcpy. 477 cons :: Word8 -> ByteString -> ByteString 478 cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do 479 poke p c 480 memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l) 481 {-# INLINE cons #-} 482 483 -- | /O(n)/ Append a byte to the end of a 'ByteString' 484 snoc :: ByteString -> Word8 -> ByteString 485 snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do 486 memcpy p (f `plusPtr` s) (fromIntegral l) 487 poke (p `plusPtr` l) c 488 {-# INLINE snoc #-} 489 490 -- todo fuse 491 492 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. 493 -- An exception will be thrown in the case of an empty ByteString. 494 head :: ByteString -> Word8 495 head (PS x s l) 496 | l <= 0 = errorEmptyList "head" 497 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s 498 {-# INLINE head #-} 499 500 -- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty. 501 -- An exception will be thrown in the case of an empty ByteString. 502 tail :: ByteString -> ByteString 503 tail (PS p s l) 504 | l <= 0 = errorEmptyList "tail" 505 | otherwise = PS p (s+1) (l-1) 506 {-# INLINE tail #-} 507 508 -- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing 509 -- if it is empty. 510 uncons :: ByteString -> Maybe (Word8, ByteString) 511 uncons (PS x s l) 512 | l <= 0 = Nothing 513 | otherwise = Just (inlinePerformIO $ withForeignPtr x 514 $ \p -> peekByteOff p s, 515 PS x (s+1) (l-1)) 516 {-# INLINE uncons #-} 517 518 -- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty. 519 -- An exception will be thrown in the case of an empty ByteString. 520 last :: ByteString -> Word8 521 last ps@(PS x s l) 522 | null ps = errorEmptyList "last" 523 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1) 524 {-# INLINE last #-} 525 526 -- | /O(1)/ Return all the elements of a 'ByteString' except the last one. 527 -- An exception will be thrown in the case of an empty ByteString. 528 init :: ByteString -> ByteString 529 init ps@(PS p s l) 530 | null ps = errorEmptyList "init" 531 | otherwise = PS p s (l-1) 532 {-# INLINE init #-} 533 534 -- | /O(n)/ Append two ByteStrings 535 append :: ByteString -> ByteString -> ByteString 536 append xs ys | null xs = ys 537 | null ys = xs 538 | otherwise = concat [xs,ys] 539 {-# INLINE append #-} 540 541 -- --------------------------------------------------------------------- 542 -- Transformations 543 544 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each 545 -- element of @xs@. This function is subject to array fusion. 546 map :: (Word8 -> Word8) -> ByteString -> ByteString 547 map f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> 548 create len $ map_ 0 (a `plusPtr` s) 549 where 550 map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO () 551 STRICT3(map_) 552 map_ n p1 p2 553 | n >= len = return () 554 | otherwise = do 555 x <- peekByteOff p1 n 556 pokeByteOff p2 n (f x) 557 map_ (n+1) p1 p2 558 {-# INLINE map #-} 559 560 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. 561 reverse :: ByteString -> ByteString 562 reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> 563 c_reverse p (f `plusPtr` s) (fromIntegral l) 564 565 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a 566 -- 'ByteString' and \`intersperses\' that byte between the elements of 567 -- the 'ByteString'. It is analogous to the intersperse function on 568 -- Lists. 569 intersperse :: Word8 -> ByteString -> ByteString 570 intersperse c ps@(PS x s l) 571 | length ps < 2 = ps 572 | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f -> 573 c_intersperse p (f `plusPtr` s) (fromIntegral l) c 574 575 -- | The 'transpose' function transposes the rows and columns of its 576 -- 'ByteString' argument. 577 transpose :: [ByteString] -> [ByteString] 578 transpose ps = P.map pack (List.transpose (P.map unpack ps)) 579 580 -- --------------------------------------------------------------------- 581 -- Reducing 'ByteString's 582 583 -- | 'foldl', applied to a binary operator, a starting value (typically 584 -- the left-identity of the operator), and a ByteString, reduces the 585 -- ByteString using the binary operator, from left to right. 586 -- 587 -- This function is subject to array fusion. 588 -- 589 foldl :: (a -> Word8 -> a) -> a -> ByteString -> a 590 foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> 591 lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) 592 where 593 STRICT3(lgo) 594 lgo z p q | p == q = return z 595 | otherwise = do c <- peek p 596 lgo (f z c) (p `plusPtr` 1) q 597 {-# INLINE foldl #-} 598 599 -- | 'foldl\'' is like 'foldl', but strict in the accumulator. 600 -- However, for ByteStrings, all left folds are strict in the accumulator. 601 -- 602 foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a 603 foldl' = foldl 604 {-# INLINE foldl' #-} 605 606 -- | 'foldr', applied to a binary operator, a starting value 607 -- (typically the right-identity of the operator), and a ByteString, 608 -- reduces the ByteString using the binary operator, from right to left. 609 foldr :: (Word8 -> a -> a) -> a -> ByteString -> a 610 foldr k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> 611 go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1)) 612 where 613 STRICT3(go) 614 go z p q | p == q = return z 615 | otherwise = do c <- peek p 616 go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive 617 {-# INLINE foldr #-} 618 619 -- | 'foldr\'' is like 'foldr', but strict in the accumulator. 620 foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a 621 foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> 622 go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1)) 623 where 624 STRICT3(go) 625 go z p q | p == q = return z 626 | otherwise = do c <- peek p 627 go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive 628 {-# INLINE foldr' #-} 629 630 -- | 'foldl1' is a variant of 'foldl' that has no starting value 631 -- argument, and thus must be applied to non-empty 'ByteStrings'. 632 -- This function is subject to array fusion. 633 -- An exception will be thrown in the case of an empty ByteString. 634 foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 635 foldl1 f ps 636 | null ps = errorEmptyList "foldl1" 637 | otherwise = foldl f (unsafeHead ps) (unsafeTail ps) 638 {-# INLINE foldl1 #-} 639 640 -- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. 641 -- An exception will be thrown in the case of an empty ByteString. 642 foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 643 foldl1' f ps 644 | null ps = errorEmptyList "foldl1'" 645 | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps) 646 {-# INLINE foldl1' #-} 647 648 -- | 'foldr1' is a variant of 'foldr' that has no starting value argument, 649 -- and thus must be applied to non-empty 'ByteString's 650 -- An exception will be thrown in the case of an empty ByteString. 651 foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 652 foldr1 f ps 653 | null ps = errorEmptyList "foldr1" 654 | otherwise = foldr f (last ps) (init ps) 655 {-# INLINE foldr1 #-} 656 657 -- | 'foldr1\'' is a variant of 'foldr1', but is strict in the 658 -- accumulator. 659 foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 660 foldr1' f ps 661 | null ps = errorEmptyList "foldr1" 662 | otherwise = foldr' f (last ps) (init ps) 663 {-# INLINE foldr1' #-} 664 665 -- --------------------------------------------------------------------- 666 -- Special folds 667 668 -- | /O(n)/ Concatenate a list of ByteStrings. 669 concat :: [ByteString] -> ByteString 670 concat [] = empty 671 concat [ps] = ps 672 concat xs = unsafeCreate len $ \ptr -> go xs ptr 673 where len = P.sum . P.map length $ xs 674 STRICT2(go) 675 go [] _ = return () 676 go (PS p s l:ps) ptr = do 677 withForeignPtr p $ \fp -> memcpy ptr (fp `plusPtr` s) (fromIntegral l) 678 go ps (ptr `plusPtr` l) 679 680 -- | Map a function over a 'ByteString' and concatenate the results 681 concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString 682 concatMap f = concat . foldr ((:) . f) [] 683 684 -- foldr (append . f) empty 685 686 -- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if 687 -- any element of the 'ByteString' satisfies the predicate. 688 any :: (Word8 -> Bool) -> ByteString -> Bool 689 any _ (PS _ _ 0) = False 690 any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> 691 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) 692 where 693 STRICT2(go) 694 go p q | p == q = return False 695 | otherwise = do c <- peek p 696 if f c then return True 697 else go (p `plusPtr` 1) q 698 {-# INLINE any #-} 699 700 -- todo fuse 701 702 -- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines 703 -- if all elements of the 'ByteString' satisfy the predicate. 704 all :: (Word8 -> Bool) -> ByteString -> Bool 705 all _ (PS _ _ 0) = True 706 all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> 707 go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) 708 where 709 STRICT2(go) 710 go p q | p == q = return True -- end of list 711 | otherwise = do c <- peek p 712 if f c 713 then go (p `plusPtr` 1) q 714 else return False 715 {-# INLINE all #-} 716 717 ------------------------------------------------------------------------ 718 719 -- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString' 720 -- This function will fuse. 721 -- An exception will be thrown in the case of an empty ByteString. 722 maximum :: ByteString -> Word8 723 maximum xs@(PS x s l) 724 | null xs = errorEmptyList "maximum" 725 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> 726 c_maximum (p `plusPtr` s) (fromIntegral l) 727 {-# INLINE maximum #-} 728 729 -- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' 730 -- This function will fuse. 731 -- An exception will be thrown in the case of an empty ByteString. 732 minimum :: ByteString -> Word8 733 minimum xs@(PS x s l) 734 | null xs = errorEmptyList "minimum" 735 | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> 736 c_minimum (p `plusPtr` s) (fromIntegral l) 737 {-# INLINE minimum #-} 738 739 ------------------------------------------------------------------------ 740 741 -- | The 'mapAccumL' function behaves like a combination of 'map' and 742 -- 'foldl'; it applies a function to each element of a ByteString, 743 -- passing an accumulating parameter from left to right, and returning a 744 -- final value of this accumulator together with the new list. 745 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 746 mapAccumL f acc (PS fp o len) = inlinePerformIO $ withForeignPtr fp $ \a -> do 747 gp <- mallocByteString len 748 acc' <- withForeignPtr gp $ \p -> mapAccumL_ acc 0 (a `plusPtr` o) p 749 return $! (acc', PS gp 0 len) 750 where 751 STRICT4(mapAccumL_) 752 mapAccumL_ s n p1 p2 753 | n >= len = return s 754 | otherwise = do 755 x <- peekByteOff p1 n 756 let (s', y) = f s x 757 pokeByteOff p2 n y 758 mapAccumL_ s' (n+1) p1 p2 759 {-# INLINE mapAccumL #-} 760 761 -- | The 'mapAccumR' function behaves like a combination of 'map' and 762 -- 'foldr'; it applies a function to each element of a ByteString, 763 -- passing an accumulating parameter from right to left, and returning a 764 -- final value of this accumulator together with the new ByteString. 765 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 766 mapAccumR f acc (PS fp o len) = inlinePerformIO $ withForeignPtr fp $ \a -> do 767 gp <- mallocByteString len 768 acc' <- withForeignPtr gp $ \p -> mapAccumR_ acc (len-1) (a `plusPtr` o) p 769 return $! (acc', PS gp 0 len) 770 where 771 STRICT4(mapAccumR_) 772 mapAccumR_ s n p q 773 | n < 0 = return s 774 | otherwise = do 775 x <- peekByteOff p n 776 let (s', y) = f s x 777 pokeByteOff q n y 778 mapAccumR_ s' (n-1) p q 779 {-# INLINE mapAccumR #-} 780 781 -- --------------------------------------------------------------------- 782 -- Building ByteStrings 783 784 -- | 'scanl' is similar to 'foldl', but returns a list of successive 785 -- reduced values from the left. This function will fuse. 786 -- 787 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] 788 -- 789 -- Note that 790 -- 791 -- > last (scanl f z xs) == foldl f z xs. 792 -- 793 scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 794 795 scanl f v (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> 796 create (len+1) $ \q -> do 797 poke q v 798 scanl_ v 0 (a `plusPtr` s) (q `plusPtr` 1) 799 where 800 STRICT4(scanl_) 801 scanl_ z n p q 802 | n >= len = return () 803 | otherwise = do 804 x <- peekByteOff p n 805 let z' = f z x 806 pokeByteOff q n z' 807 scanl_ z' (n+1) p q 808 {-# INLINE scanl #-} 809 810 -- n.b. haskell's List scan returns a list one bigger than the 811 -- input, so we need to snoc here to get some extra space, however, 812 -- it breaks map/up fusion (i.e. scanl . map no longer fuses) 813 814 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument. 815 -- This function will fuse. 816 -- 817 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] 818 scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 819 scanl1 f ps 820 | null ps = empty 821 | otherwise = scanl f (unsafeHead ps) (unsafeTail ps) 822 {-# INLINE scanl1 #-} 823 824 -- | scanr is the right-to-left dual of scanl. 825 scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 826 scanr f v (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> 827 create (len+1) $ \q -> do 828 poke (q `plusPtr` len) v 829 scanr_ v (len-1) (a `plusPtr` s) q 830 where 831 STRICT4(scanr_) 832 scanr_ z n p q 833 | n < 0 = return () 834 | otherwise = do 835 x <- peekByteOff p n 836 let z' = f x z 837 pokeByteOff q n z' 838 scanr_ z' (n-1) p q 839 {-# INLINE scanr #-} 840 841 -- | 'scanr1' is a variant of 'scanr' that has no starting value argument. 842 scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 843 scanr1 f ps 844 | null ps = empty 845 | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions 846 {-# INLINE scanr1 #-} 847 848 -- --------------------------------------------------------------------- 849 -- Unfolds and replicates 850 851 -- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@ 852 -- the value of every element. The following holds: 853 -- 854 -- > replicate w c = unfoldr w (\u -> Just (u,u)) c 855 -- 856 -- This implemenation uses @memset(3)@ 857 replicate :: Int -> Word8 -> ByteString 858 replicate w c 859 | w <= 0 = empty 860 | otherwise = unsafeCreate w $ \ptr -> 861 memset ptr c (fromIntegral w) >> return () 862 863 -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' 864 -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a 865 -- ByteString from a seed value. The function takes the element and 866 -- returns 'Nothing' if it is done producing the ByteString or returns 867 -- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string, 868 -- and @b@ is the seed value for further production. 869 -- 870 -- Examples: 871 -- 872 -- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 873 -- > == pack [0, 1, 2, 3, 4, 5] 874 -- 875 unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString 876 unfoldr f = concat . unfoldChunk 32 64 877 where unfoldChunk n n' x = 878 case unfoldrN n f x of 879 (s, Nothing) -> s : [] 880 (s, Just x') -> s : unfoldChunk n' (n+n') x' 881 {-# INLINE unfoldr #-} 882 883 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed 884 -- value. However, the length of the result is limited by the first 885 -- argument to 'unfoldrN'. This function is more efficient than 'unfoldr' 886 -- when the maximum length of the result is known. 887 -- 888 -- The following equation relates 'unfoldrN' and 'unfoldr': 889 -- 890 -- > unfoldrN n f s == take n (unfoldr f s) 891 -- 892 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) 893 unfoldrN i f x0 894 | i < 0 = (empty, Just x0) 895 | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0 896 where STRICT3(go) 897 go p x n = 898 case f x of 899 Nothing -> return (0, n, Nothing) 900 Just (w,x') 901 | n == i -> return (0, n, Just x) 902 | otherwise -> do poke p w 903 go (p `plusPtr` 1) x' (n+1) 904 {-# INLINE unfoldrN #-} 905 906 -- --------------------------------------------------------------------- 907 -- Substrings 908 909 -- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix 910 -- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@. 911 take :: Int -> ByteString -> ByteString 912 take n ps@(PS x s l) 913 | n <= 0 = empty 914 | n >= l = ps 915 | otherwise = PS x s n 916 {-# INLINE take #-} 917 918 -- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@ 919 -- elements, or @[]@ if @n > 'length' xs@. 920 drop :: Int -> ByteString -> ByteString 921 drop n ps@(PS x s l) 922 | n <= 0 = ps 923 | n >= l = empty 924 | otherwise = PS x (s+n) (l-n) 925 {-# INLINE drop #-} 926 927 -- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@. 928 splitAt :: Int -> ByteString -> (ByteString, ByteString) 929 splitAt n ps@(PS x s l) 930 | n <= 0 = (empty, ps) 931 | n >= l = (ps, empty) 932 | otherwise = (PS x s n, PS x (s+n) (l-n)) 933 {-# INLINE splitAt #-} 934 935 -- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@, 936 -- returns the longest prefix (possibly empty) of @xs@ of elements that 937 -- satisfy @p@. 938 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString 939 takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps 940 {-# INLINE takeWhile #-} 941 942 -- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@. 943 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString 944 dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps 945 {-# INLINE dropWhile #-} 946 947 -- instead of findIndexOrEnd, we could use memchr here. 948 949 -- | 'break' @p@ is equivalent to @'span' ('not' . p)@. 950 -- 951 -- Under GHC, a rewrite rule will transform break (==) into a 952 -- call to the specialised breakByte: 953 -- 954 -- > break ((==) x) = breakByte x 955 -- > break (==x) = breakByte x 956 -- 957 break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 958 break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps) 959 #if __GLASGOW_HASKELL__ 960 {-# INLINE [1] break #-} 961 #endif 962 963 {-# RULES 964 "ByteString specialise break (x==)" forall x. 965 break ((==) x) = breakByte x 966 "ByteString specialise break (==x)" forall x. 967 break (==x) = breakByte x 968 #-} 969 970 -- INTERNAL: 971 972 -- | 'breakByte' breaks its ByteString argument at the first occurence 973 -- of the specified byte. It is more efficient than 'break' as it is 974 -- implemented with @memchr(3)@. I.e. 975 -- 976 -- > break (=='c') "abcd" == breakByte 'c' "abcd" 977 -- 978 breakByte :: Word8 -> ByteString -> (ByteString, ByteString) 979 breakByte c p = case elemIndex c p of 980 Nothing -> (p,empty) 981 Just n -> (unsafeTake n p, unsafeDrop n p) 982 {-# INLINE breakByte #-} 983 984 -- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString' 985 -- 986 -- breakEnd p == spanEnd (not.p) 987 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 988 breakEnd p ps = splitAt (findFromEndUntil p ps) ps 989 990 -- | 'span' @p xs@ breaks the ByteString into two segments. It is 991 -- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@ 992 span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 993 span p ps = break (not . p) ps 994 #if __GLASGOW_HASKELL__ 995 {-# INLINE [1] span #-} 996 #endif 997 998 -- | 'spanByte' breaks its ByteString argument at the first 999 -- occurence of a byte other than its argument. It is more efficient 1000 -- than 'span (==)' 1001 -- 1002 -- > span (=='c') "abcd" == spanByte 'c' "abcd" 1003 -- 1004 spanByte :: Word8 -> ByteString -> (ByteString, ByteString) 1005 spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> 1006 go (p `plusPtr` s) 0 1007 where 1008 STRICT2(go) 1009 go p i | i >= l = return (ps, empty) 1010 | otherwise = do c' <- peekByteOff p i 1011 if c /= c' 1012 then return (unsafeTake i ps, unsafeDrop i ps) 1013 else go p (i+1) 1014 {-# INLINE spanByte #-} 1015 1016 {-# RULES 1017 "ByteString specialise span (x==)" forall x. 1018 span ((==) x) = spanByte x 1019 "ByteString specialise span (==x)" forall x. 1020 span (==x) = spanByte x 1021 #-} 1022 1023 -- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'. 1024 -- We have 1025 -- 1026 -- > spanEnd (not.isSpace) "x y z" == ("x y ","z") 1027 -- 1028 -- and 1029 -- 1030 -- > spanEnd (not . isSpace) ps 1031 -- > == 1032 -- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x) 1033 -- 1034 spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 1035 spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps 1036 1037 -- | /O(n)/ Splits a 'ByteString' into components delimited by 1038 -- separators, where the predicate returns True for a separator element. 1039 -- The resulting components do not contain the separators. Two adjacent 1040 -- separators result in an empty component in the output. eg. 1041 -- 1042 -- > splitWith (=='a') "aabbaca" == ["","","bb","c",""] 1043 -- > splitWith (=='a') [] == [] 1044 -- 1045 splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString] 1046 1047 #if defined(__GLASGOW_HASKELL__) 1048 splitWith _pred (PS _ _ 0) = [] 1049 splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp 1050 where pred# c# = pred_ (W8# c#) 1051 1052 STRICT4(splitWith0) 1053 splitWith0 pred' off' len' fp' = withPtr fp $ \p -> 1054 splitLoop pred' p 0 off' len' fp' 1055 1056 splitLoop :: (Word# -> Bool) 1057 -> Ptr Word8 1058 -> Int -> Int -> Int 1059 -> ForeignPtr Word8 1060 -> IO [ByteString] 1061 1062 splitLoop pred' p idx' off' len' fp' 1063 | idx' >= len' = return [PS fp' off' idx'] 1064 | otherwise = do 1065 w <- peekElemOff p (off'+idx') 1066 if pred' (case w of W8# w# -> w#) 1067 then return (PS fp' off' idx' : 1068 splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp') 1069 else splitLoop pred' p (idx'+1) off' len' fp' 1070 {-# INLINE splitWith #-} 1071 1072 #else 1073 splitWith _ (PS _ _ 0) = [] 1074 splitWith p ps = loop p ps 1075 where 1076 STRICT2(loop) 1077 loop q qs = if null rest then [chunk] 1078 else chunk : loop q (unsafeTail rest) 1079 where (chunk,rest) = break q qs 1080 #endif 1081 1082 -- | /O(n)/ Break a 'ByteString' into pieces separated by the byte 1083 -- argument, consuming the delimiter. I.e. 1084 -- 1085 -- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"] 1086 -- > split 'a' "aXaXaXa" == ["","X","X","X",""] 1087 -- > split 'x' "x" == ["",""] 1088 -- 1089 -- and 1090 -- 1091 -- > intercalate [c] . split c == id 1092 -- > split == splitWith . (==) 1093 -- 1094 -- As for all splitting functions in this library, this function does 1095 -- not copy the substrings, it just constructs new 'ByteStrings' that 1096 -- are slices of the original. 1097 -- 1098 split :: Word8 -> ByteString -> [ByteString] 1099 split _ (PS _ _ 0) = [] 1100 split w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do 1101 let ptr = p `plusPtr` s 1102 1103 STRICT1(loop) 1104 loop n = 1105 let q = inlinePerformIO $ memchr (ptr `plusPtr` n) 1106 w (fromIntegral (l-n)) 1107 in if q == nullPtr 1108 then [PS x (s+n) (l-n)] 1109 else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1) 1110 1111 return (loop 0) 1112 {-# INLINE split #-} 1113 1114 {- 1115 -- slower. but stays inside Haskell. 1116 split _ (PS _ _ 0) = [] 1117 split (W8# w#) (PS fp off len) = splitWith' off len fp 1118 where 1119 splitWith' off' len' fp' = withPtr fp $ \p -> 1120 splitLoop p 0 off' len' fp' 1121 1122 splitLoop :: Ptr Word8 1123 -> Int -> Int -> Int 1124 -> ForeignPtr Word8 1125 -> IO [ByteString] 1126 1127 STRICT5(splitLoop) 1128 splitLoop p idx' off' len' fp' 1129 | idx' >= len' = return [PS fp' off' idx'] 1130 | otherwise = do 1131 (W8# x#) <- peekElemOff p (off'+idx') 1132 if word2Int# w# ==# word2Int# x# 1133 then return (PS fp' off' idx' : 1134 splitWith' (off'+idx'+1) (len'-idx'-1) fp') 1135 else splitLoop p (idx'+1) off' len' fp' 1136 -} 1137 1138 {- 1139 -- | Like 'splitWith', except that sequences of adjacent separators are 1140 -- treated as a single separator. eg. 1141 -- 1142 -- > tokens (=='a') "aabbaca" == ["bb","c"] 1143 -- 1144 tokens :: (Word8 -> Bool) -> ByteString -> [ByteString] 1145 tokens f = P.filter (not.null) . splitWith f 1146 {-# INLINE tokens #-} 1147 -} 1148 1149 -- | The 'group' function takes a ByteString and returns a list of 1150 -- ByteStrings such that the concatenation of the result is equal to the 1151 -- argument. Moreover, each sublist in the result contains only equal 1152 -- elements. For example, 1153 -- 1154 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"] 1155 -- 1156 -- It is a special case of 'groupBy', which allows the programmer to 1157 -- supply their own equality test. It is about 40% faster than 1158 -- /groupBy (==)/ 1159 group :: ByteString -> [ByteString] 1160 group xs 1161 | null xs = [] 1162 | otherwise = ys : group zs 1163 where 1164 (ys, zs) = spanByte (unsafeHead xs) xs 1165 1166 -- | The 'groupBy' function is the non-overloaded version of 'group'. 1167 groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString] 1168 groupBy k xs 1169 | null xs = [] 1170 | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs) 1171 where 1172 n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs) 1173 1174 -- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of 1175 -- 'ByteString's and concatenates the list after interspersing the first 1176 -- argument between each element of the list. 1177 intercalate :: ByteString -> [ByteString] -> ByteString 1178 intercalate s = concat . (List.intersperse s) 1179 {-# INLINE [1] intercalate #-} 1180 1181 {-# RULES 1182 "ByteString specialise intercalate c -> intercalateByte" forall c s1 s2 . 1183 intercalate (singleton c) (s1 : s2 : []) = intercalateWithByte c s1 s2 1184 #-} 1185 1186 -- | /O(n)/ intercalateWithByte. An efficient way to join to two ByteStrings 1187 -- with a char. Around 4 times faster than the generalised join. 1188 -- 1189 intercalateWithByte :: Word8 -> ByteString -> ByteString -> ByteString 1190 intercalateWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr -> 1191 withForeignPtr ffp $ \fp -> 1192 withForeignPtr fgp $ \gp -> do 1193 memcpy ptr (fp `plusPtr` s) (fromIntegral l) 1194 poke (ptr `plusPtr` l) c 1195 memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m) 1196 where 1197 len = length f + length g + 1 1198 {-# INLINE intercalateWithByte #-} 1199 1200 -- --------------------------------------------------------------------- 1201 -- Indexing ByteStrings 1202 1203 -- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0. 1204 index :: ByteString -> Int -> Word8 1205 index ps n 1206 | n < 0 = moduleError "index" ("negative index: " ++ show n) 1207 | n >= length ps = moduleError "index" ("index too large: " ++ show n 1208 ++ ", length = " ++ show (length ps)) 1209 | otherwise = ps `unsafeIndex` n 1210 {-# INLINE index #-} 1211 1212 -- | /O(n)/ The 'elemIndex' function returns the index of the first 1213 -- element in the given 'ByteString' which is equal to the query 1214 -- element, or 'Nothing' if there is no such element. 1215 -- This implementation uses memchr(3). 1216 elemIndex :: Word8 -> ByteString -> Maybe Int 1217 elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do 1218 let p' = p `plusPtr` s 1219 q <- memchr p' c (fromIntegral l) 1220 return $! if q == nullPtr then Nothing else Just $! q `minusPtr` p' 1221 {-# INLINE elemIndex #-} 1222 1223 -- | /O(n)/ The 'elemIndexEnd' function returns the last index of the 1224 -- element in the given 'ByteString' which is equal to the query 1225 -- element, or 'Nothing' if there is no such element. The following 1226 -- holds: 1227 -- 1228 -- > elemIndexEnd c xs == 1229 -- > (-) (length xs - 1) `fmap` elemIndex c (reverse xs) 1230 -- 1231 elemIndexEnd :: Word8 -> ByteString -> Maybe Int 1232 elemIndexEnd ch (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> 1233 go (p `plusPtr` s) (l-1) 1234 where 1235 STRICT2(go) 1236 go p i | i < 0 = return Nothing 1237 | otherwise = do ch' <- peekByteOff p i 1238 if ch == ch' 1239 then return $ Just i 1240 else go p (i-1) 1241 {-# INLINE elemIndexEnd #-} 1242 1243 -- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning 1244 -- the indices of all elements equal to the query element, in ascending order. 1245 -- This implementation uses memchr(3). 1246 elemIndices :: Word8 -> ByteString -> [Int] 1247 elemIndices w (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do 1248 let ptr = p `plusPtr` s 1249 1250 STRICT1(loop) 1251 loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n) 1252 w (fromIntegral (l - n)) 1253 in if q == nullPtr 1254 then [] 1255 else let i = q `minusPtr` ptr 1256 in i : loop (i+1) 1257 return $! loop 0 1258 {-# INLINE elemIndices #-} 1259 1260 {- 1261 -- much slower 1262 elemIndices :: Word8 -> ByteString -> [Int] 1263 elemIndices c ps = loop 0 ps 1264 where STRICT2(loop) 1265 loop _ ps' | null ps' = [] 1266 loop n ps' | c == unsafeHead ps' = n : loop (n+1) (unsafeTail ps') 1267 | otherwise = loop (n+1) (unsafeTail ps') 1268 -} 1269 1270 -- | count returns the number of times its argument appears in the ByteString 1271 -- 1272 -- > count = length . elemIndices 1273 -- 1274 -- But more efficiently than using length on the intermediate list. 1275 count :: Word8 -> ByteString -> Int 1276 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> 1277 fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w 1278 {-# INLINE count #-} 1279 1280 {- 1281 -- 1282 -- around 30% slower 1283 -- 1284 count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> 1285 go (p `plusPtr` s) (fromIntegral m) 0 1286 where 1287 go :: Ptr Word8 -> CSize -> Int -> IO Int 1288 STRICT3(go) 1289 go p l i = do 1290 q <- memchr p w l 1291 if q == nullPtr 1292 then return i 1293 else do let k = fromIntegral $ q `minusPtr` p 1294 go (q `plusPtr` 1) (l-k-1) (i+1) 1295 -} 1296 1297 -- | The 'findIndex' function takes a predicate and a 'ByteString' and 1298 -- returns the index of the first element in the ByteString 1299 -- satisfying the predicate. 1300 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int 1301 findIndex k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 1302 where 1303 STRICT2(go) 1304 go ptr n | n >= l = return Nothing 1305 | otherwise = do w <- peek ptr 1306 if k w 1307 then return (Just n) 1308 else go (ptr `plusPtr` 1) (n+1) 1309 {-# INLINE findIndex #-} 1310 1311 -- | The 'findIndices' function extends 'findIndex', by returning the 1312 -- indices of all elements satisfying the predicate, in ascending order. 1313 findIndices :: (Word8 -> Bool) -> ByteString -> [Int] 1314 findIndices p ps = loop 0 ps 1315 where 1316 STRICT2(loop) 1317 loop n qs | null qs = [] 1318 | p (unsafeHead qs) = n : loop (n+1) (unsafeTail qs) 1319 | otherwise = loop (n+1) (unsafeTail qs) 1320 1321 -- --------------------------------------------------------------------- 1322 -- Searching ByteStrings 1323 1324 -- | /O(n)/ 'elem' is the 'ByteString' membership predicate. 1325 elem :: Word8 -> ByteString -> Bool 1326 elem c ps = case elemIndex c ps of Nothing -> False ; _ -> True 1327 {-# INLINE elem #-} 1328 1329 -- | /O(n)/ 'notElem' is the inverse of 'elem' 1330 notElem :: Word8 -> ByteString -> Bool 1331 notElem c ps = not (elem c ps) 1332 {-# INLINE notElem #-} 1333 1334 -- | /O(n)/ 'filter', applied to a predicate and a ByteString, 1335 -- returns a ByteString containing those characters that satisfy the 1336 -- predicate. This function is subject to array fusion. 1337 filter :: (Word8 -> Bool) -> ByteString -> ByteString 1338 filter k ps@(PS x s l) 1339 | null ps = ps 1340 | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do 1341 t <- go (f `plusPtr` s) p (f `plusPtr` (s + l)) 1342 return $! t `minusPtr` p -- actual length 1343 where 1344 STRICT3(go) 1345 go f t end | f == end = return t 1346 | otherwise = do 1347 w <- peek f 1348 if k w 1349 then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) end 1350 else go (f `plusPtr` 1) t end 1351 {-# INLINE filter #-} 1352 1353 {- 1354 -- 1355 -- | /O(n)/ A first order equivalent of /filter . (==)/, for the common 1356 -- case of filtering a single byte. It is more efficient to use 1357 -- /filterByte/ in this case. 1358 -- 1359 -- > filterByte == filter . (==) 1360 -- 1361 -- filterByte is around 10x faster, and uses much less space, than its 1362 -- filter equivalent 1363 -- 1364 filterByte :: Word8 -> ByteString -> ByteString 1365 filterByte w ps = replicate (count w ps) w 1366 {-# INLINE filterByte #-} 1367 1368 {-# RULES 1369 "ByteString specialise filter (== x)" forall x. 1370 filter ((==) x) = filterByte x 1371 "ByteString specialise filter (== x)" forall x. 1372 filter (== x) = filterByte x 1373 #-} 1374 -} 1375 1376 -- | /O(n)/ The 'find' function takes a predicate and a ByteString, 1377 -- and returns the first element in matching the predicate, or 'Nothing' 1378 -- if there is no such element. 1379 -- 1380 -- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing 1381 -- 1382 find :: (Word8 -> Bool) -> ByteString -> Maybe Word8 1383 find f p = case findIndex f p of 1384 Just n -> Just (p `unsafeIndex` n) 1385 _ -> Nothing 1386 {-# INLINE find #-} 1387 1388 {- 1389 -- 1390 -- fuseable, but we don't want to walk the whole array. 1391 -- 1392 find k = foldl findEFL Nothing 1393 where findEFL a@(Just _) _ = a 1394 findEFL _ c | k c = Just c 1395 | otherwise = Nothing 1396 -} 1397 1398 -- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns 1399 -- the pair of ByteStrings with elements which do and do not satisfy the 1400 -- predicate, respectively; i.e., 1401 -- 1402 -- > partition p bs == (filter p xs, filter (not . p) xs) 1403 -- 1404 partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) 1405 partition p bs = (filter p bs, filter (not . p) bs) 1406 --TODO: use a better implementation 1407 1408 -- --------------------------------------------------------------------- 1409 -- Searching for substrings 1410 1411 -- | /O(n)/ The 'isPrefixOf' function takes two ByteStrings and returns 'True' 1412 -- iff the first is a prefix of the second. 1413 isPrefixOf :: ByteString -> ByteString -> Bool 1414 isPrefixOf (PS x1 s1 l1) (PS x2 s2 l2) 1415 | l1 == 0 = True 1416 | l2 < l1 = False 1417 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 -> 1418 withForeignPtr x2 $ \p2 -> do 1419 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2) (fromIntegral l1) 1420 return $! i == 0 1421 1422 -- | /O(n)/ The 'isSuffixOf' function takes two ByteStrings and returns 'True' 1423 -- iff the first is a suffix of the second. 1424 -- 1425 -- The following holds: 1426 -- 1427 -- > isSuffixOf x y == reverse x `isPrefixOf` reverse y 1428 -- 1429 -- However, the real implemenation uses memcmp to compare the end of the 1430 -- string only, with no reverse required.. 1431 isSuffixOf :: ByteString -> ByteString -> Bool 1432 isSuffixOf (PS x1 s1 l1) (PS x2 s2 l2) 1433 | l1 == 0 = True 1434 | l2 < l1 = False 1435 | otherwise = inlinePerformIO $ withForeignPtr x1 $ \p1 -> 1436 withForeignPtr x2 $ \p2 -> do 1437 i <- memcmp (p1 `plusPtr` s1) (p2 `plusPtr` s2 `plusPtr` (l2 - l1)) (fromIntegral l1) 1438 return $! i == 0 1439 1440 -- | Check whether one string is a substring of another. @isInfixOf 1441 -- p s@ is equivalent to @not (null (findSubstrings p s))@. 1442 isInfixOf :: ByteString -> ByteString -> Bool 1443 isInfixOf p s = isJust (findSubstring p s) 1444 1445 -- | Break a string on a substring, returning a pair of the part of the 1446 -- string prior to the match, and the rest of the string. 1447 -- 1448 -- The following relationships hold: 1449 -- 1450 -- > break (== c) l == breakSubstring (singleton c) l 1451 -- 1452 -- and: 1453 -- 1454 -- > findSubstring s l == 1455 -- > if null s then Just 0 1456 -- > else case breakSubstring s l of 1457 -- > (x,y) | null y -> Nothing 1458 -- > | otherwise -> Just (length x) 1459 -- 1460 -- For example, to tokenise a string, dropping delimiters: 1461 -- 1462 -- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t) 1463 -- > where (h,t) = breakSubstring x y 1464 -- 1465 -- To skip to the first occurence of a string: 1466 -- 1467 -- > snd (breakSubstring x y) 1468 -- 1469 -- To take the parts of a string before a delimiter: 1470 -- 1471 -- > fst (breakSubstring x y) 1472 -- 1473 breakSubstring :: ByteString -- ^ String to search for 1474 -> ByteString -- ^ String to search in 1475 -> (ByteString,ByteString) -- ^ Head and tail of string broken at substring 1476 1477 breakSubstring pat src = search 0 src 1478 where 1479 STRICT2(search) 1480 search n s 1481 | null s = (src,empty) -- not found 1482 | pat `isPrefixOf` s = (take n src,s) 1483 | otherwise = search (n+1) (unsafeTail s) 1484 1485 -- | Get the first index of a substring in another string, 1486 -- or 'Nothing' if the string is not found. 1487 -- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@. 1488 findSubstring :: ByteString -- ^ String to search for. 1489 -> ByteString -- ^ String to seach in. 1490 -> Maybe Int 1491 findSubstring f i = listToMaybe (findSubstrings f i) 1492 1493 {-# DEPRECATED findSubstring "findSubstring is deprecated in favour of breakSubstring." #-} 1494 1495 {- 1496 findSubstring pat str = search 0 str 1497 where 1498 STRICT2(search) 1499 search n s 1500 = let x = pat `isPrefixOf` s 1501 in 1502 if null s 1503 then if x then Just n else Nothing 1504 else if x then Just n 1505 else search (n+1) (unsafeTail s) 1506 -} 1507 1508 -- | Find the indexes of all (possibly overlapping) occurances of a 1509 -- substring in a string. 1510 -- 1511 findSubstrings :: ByteString -- ^ String to search for. 1512 -> ByteString -- ^ String to seach in. 1513 -> [Int] 1514 findSubstrings pat str 1515 | null pat = [0 .. length str] 1516 | otherwise = search 0 str 1517 where 1518 STRICT2(search) 1519 search n s 1520 | null s = [] 1521 | pat `isPrefixOf` s = n : search (n+1) (unsafeTail s) 1522 | otherwise = search (n+1) (unsafeTail s) 1523 1524 {-# DEPRECATED findSubstrings "findSubstrings is deprecated in favour of breakSubstring." #-} 1525 1526 {- 1527 {- This function uses the Knuth-Morris-Pratt string matching algorithm. -} 1528 1529 findSubstrings pat@(PS _ _ m) str@(PS _ _ n) = search 0 0 1530 where 1531 patc x = pat `unsafeIndex` x 1532 strc x = str `unsafeIndex` x 1533 1534 -- maybe we should make kmpNext a UArray before using it in search? 1535 kmpNext = listArray (0,m) (-1:kmpNextL pat (-1)) 1536 kmpNextL p _ | null p = [] 1537 kmpNextL p j = let j' = next (unsafeHead p) j + 1 1538 ps = unsafeTail p 1539 x = if not (null ps) && unsafeHead ps == patc j' 1540 then kmpNext Array.! j' else j' 1541 in x:kmpNextL ps j' 1542 search i j = match ++ rest -- i: position in string, j: position in pattern 1543 where match = if j == m then [(i - j)] else [] 1544 rest = if i == n then [] else search (i+1) (next (strc i) j + 1) 1545 next c j | j >= 0 && (j == m || c /= patc j) = next c (kmpNext Array.! j) 1546 | otherwise = j 1547 -} 1548 1549 -- --------------------------------------------------------------------- 1550 -- Zipping 1551 1552 -- | /O(n)/ 'zip' takes two ByteStrings and returns a list of 1553 -- corresponding pairs of bytes. If one input ByteString is short, 1554 -- excess elements of the longer ByteString are discarded. This is 1555 -- equivalent to a pair of 'unpack' operations. 1556 zip :: ByteString -> ByteString -> [(Word8,Word8)] 1557 zip ps qs 1558 | null ps || null qs = [] 1559 | otherwise = (unsafeHead ps, unsafeHead qs) : zip (unsafeTail ps) (unsafeTail qs) 1560 1561 -- | 'zipWith' generalises 'zip' by zipping with the function given as 1562 -- the first argument, instead of a tupling function. For example, 1563 -- @'zipWith' (+)@ is applied to two ByteStrings to produce the list of 1564 -- corresponding sums. 1565 zipWith :: (Word8 -> Word8 -> a) -> ByteString -> ByteString -> [a] 1566 zipWith f ps qs 1567 | null ps || null qs = [] 1568 | otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs) 1569 #if defined(__GLASGOW_HASKELL__) 1570 {-# INLINE [1] zipWith #-} 1571 #endif 1572 1573 -- 1574 -- | A specialised version of zipWith for the common case of a 1575 -- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules 1576 -- are used to automatically covert zipWith into zipWith' when a pack is 1577 -- performed on the result of zipWith. 1578 -- 1579 zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString 1580 zipWith' f (PS fp s l) (PS fq t m) = inlinePerformIO $ 1581 withForeignPtr fp $ \a -> 1582 withForeignPtr fq $ \b -> 1583 create len $ zipWith_ 0 (a `plusPtr` s) (b `plusPtr` t) 1584 where 1585 zipWith_ :: Int -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO () 1586 STRICT4(zipWith_) 1587 zipWith_ n p1 p2 r 1588 | n >= len = return () 1589 | otherwise = do 1590 x <- peekByteOff p1 n 1591 y <- peekByteOff p2 n 1592 pokeByteOff r n (f x y) 1593 zipWith_ (n+1) p1 p2 r 1594 1595 len = min l m 1596 {-# INLINE zipWith' #-} 1597 1598 {-# RULES 1599 "ByteString specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q . 1600 zipWith f p q = unpack (zipWith' f p q) 1601 #-} 1602 1603 -- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of 1604 -- ByteStrings. Note that this performs two 'pack' operations. 1605 unzip :: [(Word8,Word8)] -> (ByteString,ByteString) 1606 unzip ls = (pack (P.map fst ls), pack (P.map snd ls)) 1607 {-# INLINE unzip #-} 1608 1609 -- --------------------------------------------------------------------- 1610 -- Special lists 1611 1612 -- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first. 1613 inits :: ByteString -> [ByteString] 1614 inits (PS x s l) = [PS x s n | n <- [0..l]] 1615 1616 -- | /O(n)/ Return all final segments of the given 'ByteString', longest first. 1617 tails :: ByteString -> [ByteString] 1618 tails p | null p = [empty] 1619 | otherwise = p : tails (unsafeTail p) 1620 1621 -- less efficent spacewise: tails (PS x s l) = [PS x (s+n) (l-n) | n <- [0..l]] 1622 1623 -- --------------------------------------------------------------------- 1624 -- ** Ordered 'ByteString's 1625 1626 -- | /O(n)/ Sort a ByteString efficiently, using counting sort. 1627 sort :: ByteString -> ByteString 1628 sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do 1629 1630 memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize))) 1631 withForeignPtr input (\x -> countOccurrences arr (x `plusPtr` s) l) 1632 1633 let STRICT2(go) 1634 go 256 _ = return () 1635 go i ptr = do n <- peekElemOff arr i 1636 when (n /= 0) $ memset ptr (fromIntegral i) n >> return () 1637 go (i + 1) (ptr `plusPtr` (fromIntegral n)) 1638 go 0 p 1639 where 1640 -- | Count the number of occurrences of each byte. 1641 -- Used by 'sort' 1642 -- 1643 countOccurrences :: Ptr CSize -> Ptr Word8 -> Int -> IO () 1644 STRICT3(countOccurrences) 1645 countOccurrences counts str len = go 0 1646 where 1647 STRICT1(go) 1648 go i | i == len = return () 1649 | otherwise = do k <- fromIntegral `fmap` peekElemOff str i 1650 x <- peekElemOff counts k 1651 pokeElemOff counts k (x + 1) 1652 go (i + 1) 1653 1654 {- 1655 sort :: ByteString -> ByteString 1656 sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do 1657 memcpy p (f `plusPtr` s) l 1658 c_qsort p l -- inplace 1659 -} 1660 1661 -- The 'sortBy' function is the non-overloaded version of 'sort'. 1662 -- 1663 -- Try some linear sorts: radix, counting 1664 -- Or mergesort. 1665 -- 1666 -- sortBy :: (Word8 -> Word8 -> Ordering) -> ByteString -> ByteString 1667 -- sortBy f ps = undefined 1668 1669 -- --------------------------------------------------------------------- 1670 -- Low level constructors 1671 1672 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a 1673 -- null-terminated @CString@. The @CString@ will be freed 1674 -- automatically. This is a memcpy(3). 1675 useAsCString :: ByteString -> (CString -> IO a) -> IO a 1676 useAsCString (PS fp o l) action = do 1677 allocaBytes (l+1) $ \buf -> 1678 withForeignPtr fp $ \p -> do 1679 memcpy buf (p `plusPtr` o) (fromIntegral l) 1680 pokeByteOff buf l (0::Word8) 1681 action (castPtr buf) 1682 1683 -- | /O(n) construction/ Use a @ByteString@ with a function requiring a @CStringLen@. 1684 -- As for @useAsCString@ this function makes a copy of the original @ByteString@. 1685 useAsCStringLen :: ByteString -> (CStringLen -> IO a) -> IO a 1686 useAsCStringLen p@(PS _ _ l) f = useAsCString p $ \cstr -> f (cstr,l) 1687 1688 ------------------------------------------------------------------------ 1689 1690 -- | /O(n)./ Construct a new @ByteString@ from a @CString@. The 1691 -- resulting @ByteString@ is an immutable copy of the original 1692 -- @CString@, and is managed on the Haskell heap. The original 1693 -- @CString@ must be null terminated. 1694 packCString :: CString -> IO ByteString 1695 packCString cstr = do 1696 len <- c_strlen cstr 1697 packCStringLen (cstr, fromIntegral len) 1698 1699 -- | /O(n)./ Construct a new @ByteString@ from a @CStringLen@. The 1700 -- resulting @ByteString@ is an immutable copy of the original @CStringLen@. 1701 -- The @ByteString@ is a normal Haskell value and will be managed on the 1702 -- Haskell heap. 1703 packCStringLen :: CStringLen -> IO ByteString 1704 packCStringLen (cstr, len) = create len $ \p -> 1705 memcpy p (castPtr cstr) (fromIntegral len) 1706 1707 ------------------------------------------------------------------------ 1708 1709 -- | /O(n)/ Make a copy of the 'ByteString' with its own storage. 1710 -- This is mainly useful to allow the rest of the data pointed 1711 -- to by the 'ByteString' to be garbage collected, for example 1712 -- if a large string has been read in, and only a small part of it 1713 -- is needed in the rest of the program. 1714 -- 1715 copy :: ByteString -> ByteString 1716 copy (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> 1717 memcpy p (f `plusPtr` s) (fromIntegral l) 1718 1719 -- --------------------------------------------------------------------- 1720 -- Line IO 1721 1722 -- | Read a line from stdin. 1723 getLine :: IO ByteString 1724 getLine = hGetLine stdin 1725 1726 -- | Read a line from a handle 1727 1728 hGetLine :: Handle -> IO ByteString 1729 #if !defined(__GLASGOW_HASKELL__) 1730 hGetLine h = System.IO.hGetLine h >>= return . pack . P.map c2w 1731 #else 1732 hGetLine h = wantReadableHandle "Data.ByteString.hGetLine" h $ \ handle_ -> do 1733 case haBufferMode handle_ of 1734 NoBuffering -> error "no buffering" 1735 _other -> hGetLineBuffered handle_ 1736 1737 where 1738 hGetLineBuffered handle_ = do 1739 let ref = haBuffer handle_ 1740 buf <- readIORef ref 1741 hGetLineBufferedLoop handle_ ref buf 0 [] 1742 1743 hGetLineBufferedLoop handle_ ref 1744 buf@Buffer{ bufRPtr=r, bufWPtr=w, bufBuf=raw } len xss = 1745 len `seq` do 1746 off <- findEOL r w raw 1747 let new_len = len + off - r 1748 xs <- mkPS raw r off 1749 1750 -- if eol == True, then off is the offset of the '\n' 1751 -- otherwise off == w and the buffer is now empty. 1752 if off /= w 1753 then do if (w == off + 1) 1754 then writeIORef ref buf{ bufRPtr=0, bufWPtr=0 } 1755 else writeIORef ref buf{ bufRPtr = off + 1 } 1756 mkBigPS new_len (xs:xss) 1757 else do 1758 maybe_buf <- maybeFillReadBuffer (haFD handle_) True (haIsStream handle_) 1759 buf{ bufWPtr=0, bufRPtr=0 } 1760 case maybe_buf of 1761 -- Nothing indicates we caught an EOF, and we may have a 1762 -- partial line to return. 1763 Nothing -> do 1764 writeIORef ref buf{ bufRPtr=0, bufWPtr=0 } 1765 if new_len > 0 1766 then mkBigPS new_len (xs:xss) 1767 else ioe_EOF 1768 Just new_buf -> 1769 hGetLineBufferedLoop handle_ ref new_buf new_len (xs:xss) 1770 1771 -- find the end-of-line character, if there is one 1772 findEOL r w raw 1773 | r == w = return w 1774 | otherwise = do 1775 (c,r') <- readCharFromBuffer raw r 1776 if c == '\n' 1777 then return r -- NB. not r': don't include the '\n' 1778 else findEOL r' w raw 1779 1780 maybeFillReadBuffer fd is_line is_stream buf = catch 1781 (do buf' <- fillReadBuffer fd is_line is_stream buf 1782 return (Just buf')) 1783 (\e -> if isEOFError e then return Nothing else ioError e) 1784 1785 -- TODO, rewrite to use normal memcpy 1786 mkPS :: RawBuffer -> Int -> Int -> IO ByteString 1787 mkPS buf start end = 1788 let len = end - start 1789 in create len $ \p -> do 1790 memcpy_ptr_baoff p buf (fromIntegral start) (fromIntegral len) 1791 return () 1792 1793 mkBigPS :: Int -> [ByteString] -> IO ByteString 1794 mkBigPS _ [ps] = return ps 1795 mkBigPS _ pss = return $! concat (P.reverse pss) 1796 1797 #endif 1798 1799 -- --------------------------------------------------------------------- 1800 -- Block IO 1801 1802 -- | Outputs a 'ByteString' to the specified 'Handle'. 1803 hPut :: Handle -> ByteString -> IO () 1804 hPut _ (PS _ _ 0) = return () 1805 hPut h (PS ps s l) = withForeignPtr ps $ \p-> hPutBuf h (p `plusPtr` s) l 1806 1807 -- | A synonym for @hPut@, for compatibility 1808 hPutStr :: Handle -> ByteString -> IO () 1809 hPutStr = hPut 1810 1811 -- | Write a ByteString to a handle, appending a newline byte 1812 hPutStrLn :: Handle -> ByteString -> IO () 1813 hPutStrLn h ps 1814 | length ps < 1024 = hPut h (ps `snoc` 0x0a) 1815 | otherwise = hPut h ps >> hPut h (singleton (0x0a)) -- don't copy 1816 1817 -- | Write a ByteString to stdout 1818 putStr :: ByteString -> IO () 1819 putStr = hPut stdout 1820 1821 -- | Write a ByteString to stdout, appending a newline byte 1822 putStrLn :: ByteString -> IO () 1823 putStrLn = hPutStrLn stdout 1824 1825 ------------------------------------------------------------------------ 1826 -- Low level IO 1827 1828 -- | Read a 'ByteString' directly from the specified 'Handle'. This 1829 -- is far more efficient than reading the characters into a 'String' 1830 -- and then using 'pack'. First argument is the Handle to read from, 1831 -- and the second is the number of bytes to read. 1832 -- 1833 hGet :: Handle -> Int -> IO ByteString 1834 hGet _ 0 = return empty 1835 hGet h i = createAndTrim i $ \p -> hGetBuf h p i 1836 1837 -- | hGetNonBlocking is identical to 'hGet', except that it will never block 1838 -- waiting for data to become available, instead it returns only whatever data 1839 -- is available. 1840 -- 1841 hGetNonBlocking :: Handle -> Int -> IO ByteString 1842 #if defined(__GLASGOW_HASKELL__) 1843 hGetNonBlocking _ 0 = return empty 1844 hGetNonBlocking h i = createAndTrim i $ \p -> hGetBufNonBlocking h p i 1845 #else 1846 hGetNonBlocking = hGet 1847 #endif 1848 1849 -- | Read entire handle contents strictly into a 'ByteString'. 1850 -- 1851 -- This function reads chunks at a time, doubling the chunksize on each 1852 -- read. The final buffer is then realloced to the appropriate size. For 1853 -- files > half of available memory, this may lead to memory exhaustion. 1854 -- Consider using 'readFile' in this case. 1855 -- 1856 -- As with 'hGet', the string representation in the file is assumed to 1857 -- be ISO-8859-1. 1858 -- 1859 -- The Handle is closed once the contents have been read, 1860 -- or if an exception is thrown. 1861 -- 1862 hGetContents :: Handle -> IO ByteString 1863 hGetContents h = always (hClose h) $ do -- strict, so hClose 1864 let start_size = 1024 1865 p <- mallocBytes start_size 1866 i <- hGetBuf h p start_size 1867 if i < start_size 1868 then do p' <- reallocBytes p i 1869 fp <- newForeignPtr finalizerFree p' 1870 return $! PS fp 0 i 1871 else f p start_size 1872 where 1873 always = flip finally 1874 f p s = do 1875 let s' = 2 * s 1876 p' <- reallocBytes p s' 1877 i <- hGetBuf h (p' `plusPtr` s) s 1878 if i < s 1879 then do let i' = s + i 1880 p'' <- reallocBytes p' i' 1881 fp <- newForeignPtr finalizerFree p'' 1882 return $! PS fp 0 i' 1883 else f p' s' 1884 1885 -- | getContents. Read stdin strictly. Equivalent to hGetContents stdin 1886 -- The 'Handle' is closed after the contents have been read. 1887 -- 1888 getContents :: IO ByteString 1889 getContents = hGetContents stdin 1890 1891 -- | The interact function takes a function of type @ByteString -> ByteString@ 1892 -- as its argument. The entire input from the standard input device is passed 1893 -- to this function as its argument, and the resulting string is output on the 1894 -- standard output device. 1895 -- 1896 interact :: (ByteString -> ByteString) -> IO () 1897 interact transformer = putStr . transformer =<< getContents 1898 1899 -- | Read an entire file strictly into a 'ByteString'. This is far more 1900 -- efficient than reading the characters into a 'String' and then using 1901 -- 'pack'. It also may be more efficient than opening the file and 1902 -- reading it using hGet. Files are read using 'binary mode' on Windows, 1903 -- for 'text mode' use the Char8 version of this function. 1904 -- 1905 readFile :: FilePath -> IO ByteString 1906 readFile f = bracket (openBinaryFile f ReadMode) hClose 1907 (\h -> hFileSize h >>= hGet h . fromIntegral) 1908 1909 -- | Write a 'ByteString' to a file. 1910 writeFile :: FilePath -> ByteString -> IO () 1911 writeFile f txt = bracket (openBinaryFile f WriteMode) hClose 1912 (\h -> hPut h txt) 1913 1914 -- | Append a 'ByteString' to a file. 1915 appendFile :: FilePath -> ByteString -> IO () 1916 appendFile f txt = bracket (openBinaryFile f AppendMode) hClose 1917 (\h -> hPut h txt) 1918 1919 -- --------------------------------------------------------------------- 1920 -- Internal utilities 1921 1922 -- | 'findIndexOrEnd' is a variant of findIndex, that returns the length 1923 -- of the string if no element is found, rather than Nothing. 1924 findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int 1925 findIndexOrEnd k (PS x s l) = inlinePerformIO $ withForeignPtr x $ \f -> go (f `plusPtr` s) 0 1926 where 1927 STRICT2(go) 1928 go ptr n | n >= l = return l 1929 | otherwise = do w <- peek ptr 1930 if k w 1931 then return n 1932 else go (ptr `plusPtr` 1) (n+1) 1933 {-# INLINE findIndexOrEnd #-} 1934 1935 -- | Perform an operation with a temporary ByteString 1936 withPtr :: ForeignPtr a -> (Ptr a -> IO b) -> b 1937 withPtr fp io = inlinePerformIO (withForeignPtr fp io) 1938 {-# INLINE withPtr #-} 1939 1940 -- Common up near identical calls to `error' to reduce the number 1941 -- constant strings created when compiled: 1942 errorEmptyList :: String -> a 1943 errorEmptyList fun = moduleError fun "empty ByteString" 1944 {-# NOINLINE errorEmptyList #-} 1945 1946 moduleError :: String -> String -> a 1947 moduleError fun msg = error ("Data.ByteString." ++ fun ++ ':':' ':msg) 1948 {-# NOINLINE moduleError #-} 1949 1950 -- Find from the end of the string using predicate 1951 findFromEndUntil :: (Word8 -> Bool) -> ByteString -> Int 1952 STRICT2(findFromEndUntil) 1953 findFromEndUntil f ps@(PS x s l) = 1954 if null ps then 0 1955 else if f (last ps) then l 1956 else findFromEndUntil f (PS x s (l-1))