1 {-# OPTIONS_GHC -fno-warn-orphans #-} 2 {-# LANGUAGE BangPatterns, CPP #-} 3 -- | 4 -- Module : Data.Text.Lazy 5 -- Copyright : (c) 2009, 2010 Bryan O'Sullivan 6 -- 7 -- License : BSD-style 8 -- Maintainer : bos@serpentine.com, rtomharper@googlemail.com, 9 -- duncan@haskell.org 10 -- Stability : experimental 11 -- Portability : GHC 12 -- 13 -- A time and space-efficient implementation of Unicode text using 14 -- lists of packed arrays. This representation is suitable for high 15 -- performance use and for streaming large quantities of data. It 16 -- provides a means to manipulate a large body of text without 17 -- requiring that the entire content be resident in memory. 18 -- 19 -- Some operations, such as 'concat', 'append', 'reverse' and 'cons', 20 -- have better complexity than their "Data.Text" equivalents, due to 21 -- optimisations resulting from the list spine structure. And for 22 -- other operations lazy 'Text's are usually within a few percent of 23 -- strict ones, but with better heap usage. For data larger than 24 -- available memory, or if you have tight memory constraints, this 25 -- module will be the only option. 26 -- 27 -- This module is intended to be imported @qualified@, to avoid name 28 -- clashes with "Prelude" functions. eg. 29 -- 30 -- > import qualified Data.Text.Lazy as B 31 32 module Data.Text.Lazy 33 ( 34 Text 35 -- * Creation and elimination 36 , pack 37 , unpack 38 , singleton 39 , empty 40 , fromChunks 41 , toChunks 42 , toStrict 43 , fromStrict 44 , foldrChunks 45 , foldlChunks 46 47 -- * Basic interface 48 , cons 49 , snoc 50 , append 51 , uncons 52 , head 53 , last 54 , tail 55 , init 56 , null 57 , length 58 , compareLength 59 60 -- * Transformations 61 , map 62 , intercalate 63 , intersperse 64 , transpose 65 , reverse 66 , replace 67 68 -- ** Case conversion 69 -- $case 70 , toCaseFold 71 , toLower 72 , toUpper 73 74 -- ** Justification 75 , justifyLeft 76 , justifyRight 77 , center 78 79 -- * Folds 80 , foldl 81 , foldl' 82 , foldl1 83 , foldl1' 84 , foldr 85 , foldr1 86 87 -- ** Special folds 88 , concat 89 , concatMap 90 , any 91 , all 92 , maximum 93 , minimum 94 95 -- * Construction 96 97 -- ** Scans 98 , scanl 99 , scanl1 100 , scanr 101 , scanr1 102 103 -- ** Accumulating maps 104 , mapAccumL 105 , mapAccumR 106 107 -- ** Generation and unfolding 108 , replicate 109 , unfoldr 110 , unfoldrN 111 112 -- * Substrings 113 114 -- ** Breaking strings 115 , take 116 , drop 117 , takeWhile 118 , dropWhile 119 , dropWhileEnd 120 , dropAround 121 , strip 122 , stripStart 123 , stripEnd 124 , splitAt 125 , spanBy 126 , break 127 , breakEnd 128 , breakBy 129 , group 130 , groupBy 131 , inits 132 , tails 133 134 -- ** Breaking into many substrings 135 -- $split 136 , split 137 , splitBy 138 , chunksOf 139 -- , breakSubstring 140 141 -- ** Breaking into lines and words 142 , lines 143 , words 144 , unlines 145 , unwords 146 147 -- * Predicates 148 , isPrefixOf 149 , isSuffixOf 150 , isInfixOf 151 152 -- ** View patterns 153 , prefixed 154 , suffixed 155 156 -- * Searching 157 , filter 158 , find 159 , findBy 160 , partitionBy 161 162 -- , findSubstring 163 164 -- * Indexing 165 , index 166 , count 167 168 -- * Zipping and unzipping 169 , zip 170 , zipWith 171 172 -- -* Ordered text 173 -- , sort 174 ) where 175 176 import Prelude (Char, Bool(..), Maybe(..), String, 177 Eq(..), Ord(..), Ordering, Read(..), Show(..), 178 (&&), (+), (-), (.), ($), (++), 179 div, error, flip, fromIntegral, not, otherwise) 180 import qualified Prelude as P 181 #if defined(HAVE_DEEPSEQ) 182 import Control.DeepSeq (NFData(..)) 183 #endif 184 import Data.Int (Int64) 185 import qualified Data.List as L 186 import Data.Char (isSpace) 187 import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf)) 188 #if __GLASGOW_HASKELL__ >= 612 189 import Data.Data (mkNoRepType) 190 #else 191 import Data.Data (mkNorepType) 192 #endif 193 import Data.Monoid (Monoid(..)) 194 import Data.String (IsString(..)) 195 import qualified Data.Text as T 196 import qualified Data.Text.Internal as T 197 import qualified Data.Text.Fusion.Common as S 198 import qualified Data.Text.Unsafe as T 199 import qualified Data.Text.Lazy.Fusion as S 200 import Data.Text.Fusion.Internal (PairS(..)) 201 import Data.Text.Lazy.Fusion (stream, unstream) 202 import Data.Text.Lazy.Internal (Text(..), chunk, empty, foldlChunks, foldrChunks) 203 import Data.Text.Internal (textP) 204 import Data.Text.Lazy.Search (indices) 205 206 instance Eq Text where 207 -- entered 4395 timest1 == t2 = stream t1 == stream t2 208 {-# INLINE (==) #-} 209 210 instance Ord Text where 211 -- entered 100 timescompare t1 t2 = compare (stream t1) (stream t2) 212 {-# INLINE compare #-} 213 214 instance Show Text where 215 -- entered 100 timesshowsPrec p ps r = showsPrec p (unpack ps) r 216 217 instance Read Text where 218 -- entered 100 timesreadsPrec p str = [(pack x,y) | (x,y) <- readsPrec p str] 219 220 instance Monoid Text where 221 -- never enteredmempty = empty 222 -- entered 100 timesmappend = append 223 -- entered 100 timesmconcat = concat 224 225 instance IsString Text where 226 -- entered 401 timesfromString = pack 227 228 #if defined(HAVE_DEEPSEQ) 229 instance NFData Text where 230 -- entered 200 timesrnf Empty = () 231 rnf (Chunk _ ts) = rnf ts 232 #endif 233 234 instance Data Text where 235 -- never enteredgfoldl f z txt = z pack `f` (unpack txt) 236 -- never enteredtoConstr _ = error "Data.Text.Lazy.Text.toConstr" 237 -- never enteredgunfold _ _ = error "Data.Text.Lazy.Text.gunfold" 238 #if __GLASGOW_HASKELL__ >= 612 239 -- never entereddataTypeOf _ = mkNoRepType "Data.Text.Lazy.Text" 240 #else 241 dataTypeOf _ = mkNorepType "Data.Text.Lazy.Text" 242 #endif 243 244 -- | /O(n)/ Convert a 'String' into a 'Text'. 245 -- 246 -- This function is subject to array fusion. 247 pack :: String -> Text 248 -- entered 25,660 timespack s = unstream (S.streamList s) 249 {-# INLINE [1] pack #-} 250 251 -- | /O(n)/ Convert a 'Text' into a 'String'. 252 -- Subject to array fusion. 253 unpack :: Text -> String 254 -- entered 52,846 timesunpack t = S.unstreamList (stream t) 255 {-# INLINE [1] unpack #-} 256 257 -- | /O(1)/ Convert a character into a Text. 258 -- Subject to fusion. 259 singleton :: Char -> Text 260 -- entered 5234 timessingleton c = Chunk (T.singleton c) Empty 261 {-# INLINE [1] singleton #-} 262 263 {-# RULES 264 "LAZY TEXT singleton -> fused" [~1] forall c. 265 singleton c = unstream (S.singleton c) 266 "LAZY TEXT singleton -> unfused" [1] forall c. 267 unstream (S.singleton c) = singleton c 268 #-} 269 270 -- | /O(c)/ Convert a list of strict 'T.Text's into a lazy 'Text'. 271 fromChunks :: [T.Text] -> Text 272 -- entered 7740 timesfromChunks cs = L.foldr chunk Empty cs 273 274 -- | /O(n)/ Convert a lazy 'Text' into a list of strict 'T.Text's. 275 toChunks :: Text -> [T.Text] 276 -- entered 647 timestoChunks cs = foldrChunks (:) [] cs 277 278 -- | /O(n)/ Convert a lazy 'Text' into a strict 'T.Text'. 279 toStrict :: Text -> T.Text 280 -- entered 100 timestoStrict t = T.concat (toChunks t) 281 {-# INLINE [1] toStrict #-} 282 283 -- | /O(c)/ Convert a strict 'T.Text' into a lazy 'Text'. 284 fromStrict :: T.Text -> Text 285 -- entered 2664 timesfromStrict t = chunk t Empty 286 {-# INLINE [1] fromStrict #-} 287 288 -- ----------------------------------------------------------------------------- 289 -- * Basic functions 290 291 -- | /O(n)/ Adds a character to the front of a 'Text'. This function 292 -- is more costly than its 'List' counterpart because it requires 293 -- copying a new array. Subject to fusion. 294 cons :: Char -> Text -> Text 295 -- entered 400 timescons c t = Chunk (T.singleton c) t 296 {-# INLINE [1] cons #-} 297 298 {-# RULES 299 "LAZY TEXT cons -> fused" [~1] forall c t. 300 cons c t = unstream (S.cons c (stream t)) 301 "LAZY TEXT cons -> unfused" [1] forall c t. 302 unstream (S.cons c (stream t)) = cons c t 303 #-} 304 305 -- | /O(n)/ Adds a character to the end of a 'Text'. This copies the 306 -- entire array in the process, unless fused. Subject to fusion. 307 snoc :: Text -> Char -> Text 308 -- entered 5133 timessnoc t c = foldrChunks Chunk (singleton c) t 309 {-# INLINE [1] snoc #-} 310 311 {-# RULES 312 "LAZY TEXT snoc -> fused" [~1] forall t c. 313 snoc t c = unstream (S.snoc (stream t) c) 314 "LAZY TEXT snoc -> unfused" [1] forall t c. 315 unstream (S.snoc (stream t) c) = snoc t c 316 #-} 317 318 -- | /O(n\/c)/ Appends one 'Text' to another. Subject to array 319 -- fusion. 320 append :: Text -> Text -> Text 321 -- entered 1826 timesappend xs ys = foldrChunks Chunk ys xs 322 {-# INLINE [1] append #-} 323 324 {-# RULES 325 "LAZY TEXT append -> fused" [~1] forall t1 t2. 326 append t1 t2 = unstream (S.append (stream t1) (stream t2)) 327 "LAZY TEXT append -> unfused" [1] forall t1 t2. 328 unstream (S.append (stream t1) (stream t2)) = append t1 t2 329 #-} 330 331 -- | /O(1)/ Returns the first character and rest of a 'Text', or 332 -- 'Nothing' if empty. Subject to array fusion. 333 uncons :: Text -> Maybe (Char, Text) 334 -- entered 17,434 timesuncons Empty = Nothing 335 uncons (Chunk t ts) = 336 Just (T.unsafeHead t, 337 if T.length t == 1 then ts else Chunk (T.unsafeTail t) ts) 338 {-# INLINE uncons #-} 339 340 -- | /O(1)/ Returns the first character of a 'Text', which must be 341 -- non-empty. Subject to array fusion. 342 head :: Text -> Char 343 -- entered 417 timeshead t = S.head (stream t) 344 {-# INLINE head #-} 345 346 -- | /O(1)/ Returns all characters after the head of a 'Text', which 347 -- must be non-empty. Subject to array fusion. 348 tail :: Text -> Text 349 -- entered 400 timestail (Chunk t ts) = chunk (T.tail t) ts 350 tail Empty = emptyError "tail" 351 {-# INLINE [1] tail #-} 352 353 {-# RULES 354 "LAZY TEXT tail -> fused" [~1] forall t. 355 tail t = unstream (S.tail (stream t)) 356 "LAZY TEXT tail -> unfused" [1] forall t. 357 unstream (S.tail (stream t)) = tail t 358 #-} 359 360 -- | /O(1)/ Returns all but the last character of a 'Text', which must 361 -- be non-empty. Subject to array fusion. 362 init :: Text -> Text 363 -- entered 400 timesinit (Chunk t0 ts0) = go t0 ts0 364 where go t (Chunk t' ts) = Chunk t (go t' ts) 365 go t Empty = chunk (T.init t) Empty 366 init Empty = emptyError "init" 367 {-# INLINE [1] init #-} 368 369 {-# RULES 370 "LAZY TEXT init -> fused" [~1] forall t. 371 init t = unstream (S.init (stream t)) 372 "LAZY TEXT init -> unfused" [1] forall t. 373 unstream (S.init (stream t)) = init t 374 #-} 375 376 -- | /O(1)/ Tests whether a 'Text' is empty or not. Subject to array 377 -- fusion. 378 null :: Text -> Bool 379 -- entered 4144 timesnull Empty = True 380 null _ = False 381 {-# INLINE [1] null #-} 382 383 {-# RULES 384 "LAZY TEXT null -> fused" [~1] forall t. 385 null t = S.null (stream t) 386 "LAZY TEXT null -> unfused" [1] forall t. 387 S.null (stream t) = null t 388 #-} 389 390 -- | /O(1)/ Tests whether a 'Text' contains exactly one character. 391 -- Subject to fusion. 392 isSingleton :: Text -> Bool 393 -- entered 1052 timesisSingleton = S.isSingleton . stream 394 {-# INLINE isSingleton #-} 395 396 -- | /O(1)/ Returns the last character of a 'Text', which must be 397 -- non-empty. Subject to array fusion. 398 last :: Text -> Char 399 -- entered 779 timeslast Empty = emptyError "last" 400 last (Chunk t ts) = go t ts 401 where go _ (Chunk t' ts') = go t' ts' 402 go t' Empty = T.last t' 403 {-# INLINE [1] last #-} 404 405 {-# RULES 406 "LAZY TEXT last -> fused" [~1] forall t. 407 last t = S.last (stream t) 408 "LAZY TEXT last -> unfused" [1] forall t. 409 S.last (stream t) = last t 410 #-} 411 412 -- | /O(n)/ Returns the number of characters in a 'Text'. 413 -- Subject to fusion. 414 length :: Text -> Int64 415 -- entered 1801 timeslength = foldlChunks go 0 416 where go l t = l + fromIntegral (T.length t) 417 {-# INLINE [1] length #-} 418 419 {-# RULES 420 "LAZY TEXT length -> fused" [~1] forall t. 421 length t = S.length (stream t) 422 "LAZY TEXT length -> unfused" [1] forall t. 423 S.length (stream t) = length t 424 #-} 425 426 -- | /O(n)/ Compare the count of characters in a 'Text' to a number. 427 -- Subject to fusion. 428 -- 429 -- This function gives the same answer as comparing against the result 430 -- of 'length', but can short circuit if the count of characters is 431 -- greater than the number, and hence be more efficient. 432 compareLength :: Text -> Int64 -> Ordering 433 -- entered 100 timescompareLength t n = S.compareLengthI (stream t) n 434 {-# INLINE [1] compareLength #-} 435 436 -- We don't apply those otherwise appealing length-to-compareLength 437 -- rewrite rules here, because they can change the strictness 438 -- properties of code. 439 440 -- | /O(n)/ 'map' @f@ @t@ is the 'Text' obtained by applying @f@ to 441 -- each element of @t@. Subject to array fusion. 442 map :: (Char -> Char) -> Text -> Text 443 -- entered 500 timesmap f t = unstream (S.map f (stream t)) 444 {-# INLINE [1] map #-} 445 446 -- | /O(n)/ The 'intercalate' function takes a 'Text' and a list of 447 -- 'Text's and concatenates the list after interspersing the first 448 -- argument between each element of the list. 449 intercalate :: Text -> [Text] -> Text 450 -- entered 600 timesintercalate t = concat . (L.intersperse t) 451 {-# INLINE intercalate #-} 452 453 -- | /O(n)/ The 'intersperse' function takes a character and places it 454 -- between the characters of a 'Text'. Subject to array fusion. 455 intersperse :: Char -> Text -> Text 456 -- entered 400 timesintersperse c t = unstream (S.intersperse c (stream t)) 457 {-# INLINE intersperse #-} 458 459 -- | /O(n)/ Left-justify a string to the given length, using the 460 -- specified fill character on the right. Subject to fusion. Examples: 461 -- 462 -- > justifyLeft 7 'x' "foo" == "fooxxxx" 463 -- > justifyLeft 3 'x' "foobar" == "foobar" 464 justifyLeft :: Int64 -> Char -> Text -> Text 465 -- entered 400 timesjustifyLeft k c t 466 | len >= k = t 467 | otherwise = t `append` replicateChar (k-len) c 468 where len = length t 469 {-# INLINE [1] justifyLeft #-} 470 471 {-# RULES 472 "LAZY TEXT justifyLeft -> fused" [~1] forall k c t. 473 justifyLeft k c t = unstream (S.justifyLeftI k c (stream t)) 474 "LAZY TEXT justifyLeft -> unfused" [1] forall k c t. 475 unstream (S.justifyLeftI k c (stream t)) = justifyLeft k c t 476 #-} 477 478 -- | /O(n)/ Right-justify a string to the given length, using the 479 -- specified fill character on the left. Examples: 480 -- 481 -- > justifyRight 7 'x' "bar" == "xxxxbar" 482 -- > justifyRight 3 'x' "foobar" == "foobar" 483 justifyRight :: Int64 -> Char -> Text -> Text 484 -- entered 400 timesjustifyRight k c t 485 | len >= k = t 486 | otherwise = replicateChar (k-len) c `append` t 487 where len = length t 488 {-# INLINE justifyRight #-} 489 490 -- | /O(n)/ Center a string to the given length, using the 491 -- specified fill character on either side. Examples: 492 -- 493 -- > center 8 'x' "HS" = "xxxHSxxx" 494 center :: Int64 -> Char -> Text -> Text 495 -- entered 400 timescenter k c t 496 | len >= k = t 497 | otherwise = replicateChar l c `append` t `append` replicateChar r c 498 where len = length t 499 d = k - len 500 r = d `div` 2 501 l = d - r 502 {-# INLINE center #-} 503 504 -- | /O(n)/ The 'transpose' function transposes the rows and columns 505 -- of its 'Text' argument. Note that this function uses 'pack', 506 -- 'unpack', and the list version of transpose, and is thus not very 507 -- efficient. 508 transpose :: [Text] -> [Text] 509 -- entered 100 timestranspose ts = L.map (\ss -> Chunk (T.pack ss) Empty) 510 (L.transpose (L.map unpack ts)) 511 -- TODO: make this fast 512 513 -- | /O(n)/ 'reverse' @t@ returns the elements of @t@ in reverse order. 514 reverse :: Text -> Text 515 -- entered 2858 timesreverse = rev Empty 516 where rev a Empty = a 517 rev a (Chunk t ts) = rev (Chunk (T.reverse t) a) ts 518 519 -- | /O(m+n)/ Replace every occurrence of one substring with another. 520 replace :: Text -- ^ Text to search for 521 -> Text -- ^ Replacement text 522 -> Text -- ^ Input text 523 -> Text 524 -- entered 100 timesreplace s d = intercalate d . split s 525 {-# INLINE replace #-} 526 527 -- ---------------------------------------------------------------------------- 528 -- ** Case conversions (folds) 529 530 -- $case 531 -- 532 -- With Unicode text, it is incorrect to use combinators like @map 533 -- toUpper@ to case convert each character of a string individually. 534 -- Instead, use the whole-string case conversion functions from this 535 -- module. For correctness in different writing systems, these 536 -- functions may map one input character to two or three output 537 -- characters. 538 539 -- | /O(n)/ Convert a string to folded case. This function is mainly 540 -- useful for performing caseless (or case insensitive) string 541 -- comparisons. 542 -- 543 -- A string @x@ is a caseless match for a string @y@ if and only if: 544 -- 545 -- @toCaseFold x == toCaseFold y@ 546 -- 547 -- The result string may be longer than the input string, and may 548 -- differ from applying 'toLower' to the input string. For instance, 549 -- the Armenian small ligature men now (U+FB13) is case folded to the 550 -- bigram men now (U+0574 U+0576), while the micro sign (U+00B5) is 551 -- case folded to the Greek small letter letter mu (U+03BC) instead of 552 -- itself. 553 toCaseFold :: Text -> Text 554 -- entered 100 timestoCaseFold t = unstream (S.toCaseFold (stream t)) 555 {-# INLINE [0] toCaseFold #-} 556 557 -- | /O(n)/ Convert a string to lower case, using simple case 558 -- conversion. The result string may be longer than the input string. 559 -- For instance, the Latin capital letter I with dot above (U+0130) 560 -- maps to the sequence Latin small letter i (U+0069) followed by 561 -- combining dot above (U+0307). 562 toLower :: Text -> Text 563 -- entered 100 timestoLower t = unstream (S.toLower (stream t)) 564 {-# INLINE toLower #-} 565 566 -- | /O(n)/ Convert a string to upper case, using simple case 567 -- conversion. The result string may be longer than the input string. 568 -- For instance, the German eszett (U+00DF) maps to the two-letter 569 -- sequence SS. 570 toUpper :: Text -> Text 571 -- entered 100 timestoUpper t = unstream (S.toUpper (stream t)) 572 {-# INLINE toUpper #-} 573 574 -- | /O(n)/ 'foldl', applied to a binary operator, a starting value 575 -- (typically the left-identity of the operator), and a 'Text', 576 -- reduces the 'Text' using the binary operator, from left to right. 577 -- Subject to array fusion. 578 foldl :: (b -> Char -> b) -> b -> Text -> b 579 -- entered 400 timesfoldl f z t = S.foldl f z (stream t) 580 {-# INLINE foldl #-} 581 582 -- | /O(n)/ A strict version of 'foldl'. 583 -- Subject to array fusion. 584 foldl' :: (b -> Char -> b) -> b -> Text -> b 585 -- entered 400 timesfoldl' f z t = S.foldl' f z (stream t) 586 {-# INLINE foldl' #-} 587 588 -- | /O(n)/ A variant of 'foldl' that has no starting value argument, 589 -- and thus must be applied to a non-empty 'Text'. Subject to array 590 -- fusion. 591 foldl1 :: (Char -> Char -> Char) -> Text -> Char 592 -- entered 400 timesfoldl1 f t = S.foldl1 f (stream t) 593 {-# INLINE foldl1 #-} 594 595 -- | /O(n)/ A strict version of 'foldl1'. 596 -- Subject to array fusion. 597 foldl1' :: (Char -> Char -> Char) -> Text -> Char 598 -- entered 400 timesfoldl1' f t = S.foldl1' f (stream t) 599 {-# INLINE foldl1' #-} 600 601 -- | /O(n)/ 'foldr', applied to a binary operator, a starting value 602 -- (typically the right-identity of the operator), and a 'Text', 603 -- reduces the 'Text' using the binary operator, from right to left. 604 -- Subject to array fusion. 605 foldr :: (Char -> b -> b) -> b -> Text -> b 606 -- entered 800 timesfoldr f z t = S.foldr f z (stream t) 607 {-# INLINE foldr #-} 608 609 -- | /O(n)/ A variant of 'foldr' that has no starting value argument, and 610 -- thust must be applied to a non-empty 'Text'. Subject to array 611 -- fusion. 612 foldr1 :: (Char -> Char -> Char) -> Text -> Char 613 -- entered 400 timesfoldr1 f t = S.foldr1 f (stream t) 614 {-# INLINE foldr1 #-} 615 616 -- | /O(n)/ Concatenate a list of 'Text's. 617 concat :: [Text] -> Text 618 -- entered 1701 timesconcat = to 619 where 620 go Empty css = to css 621 go (Chunk c cs) css = Chunk c (go cs css) 622 to [] = Empty 623 to (cs:css) = go cs css 624 {-# INLINE concat #-} 625 626 -- | /O(n)/ Map a function over a 'Text' that results in a 'Text', and 627 -- concatenate the results. 628 concatMap :: (Char -> Text) -> Text -> Text 629 -- entered 100 timesconcatMap f = concat . foldr ((:) . f) [] 630 {-# INLINE concatMap #-} 631 632 -- | /O(n)/ 'any' @p@ @t@ determines whether any character in the 633 -- 'Text' @t@ satisifes the predicate @p@. Subject to array fusion. 634 any :: (Char -> Bool) -> Text -> Bool 635 -- entered 400 timesany p t = S.any p (stream t) 636 {-# INLINE any #-} 637 638 -- | /O(n)/ 'all' @p@ @t@ determines whether all characters in the 639 -- 'Text' @t@ satisify the predicate @p@. Subject to array fusion. 640 all :: (Char -> Bool) -> Text -> Bool 641 -- entered 400 timesall p t = S.all p (stream t) 642 {-# INLINE all #-} 643 644 -- | /O(n)/ 'maximum' returns the maximum value from a 'Text', which 645 -- must be non-empty. Subject to array fusion. 646 maximum :: Text -> Char 647 -- entered 400 timesmaximum t = S.maximum (stream t) 648 {-# INLINE maximum #-} 649 650 -- | /O(n)/ 'minimum' returns the minimum value from a 'Text', which 651 -- must be non-empty. Subject to array fusion. 652 minimum :: Text -> Char 653 -- entered 400 timesminimum t = S.minimum (stream t) 654 {-# INLINE minimum #-} 655 656 -- | /O(n)/ 'scanl' is similar to 'foldl', but returns a list of 657 -- successive reduced values from the left. This function is subject 658 -- to array fusion. 659 -- 660 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] 661 -- 662 -- Note that 663 -- 664 -- > last (scanl f z xs) == foldl f z xs. 665 scanl :: (Char -> Char -> Char) -> Char -> Text -> Text 666 -- entered 1545 timesscanl f z t = unstream (S.scanl f z (stream t)) 667 {-# INLINE scanl #-} 668 669 -- | /O(n)/ 'scanl1' is a variant of 'scanl' that has no starting 670 -- value argument. This function is subject to array fusion. 671 -- 672 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] 673 scanl1 :: (Char -> Char -> Char) -> Text -> Text 674 -- entered 400 timesscanl1 f t0 = case uncons t0 of 675 Nothing -> empty 676 Just (t,ts) -> scanl f t ts 677 {-# INLINE scanl1 #-} 678 679 -- | /O(n)/ 'scanr' is the right-to-left dual of 'scanl'. 680 -- 681 -- > scanr f v == reverse . scanl (flip f) v . reverse 682 scanr :: (Char -> Char -> Char) -> Char -> Text -> Text 683 -- entered 479 timesscanr f v = reverse . scanl (flip f) v . reverse 684 685 -- | /O(n)/ 'scanr1' is a variant of 'scanr' that has no starting 686 -- value argument. 687 scanr1 :: (Char -> Char -> Char) -> Text -> Text 688 -- entered 400 timesscanr1 f t | null t = empty 689 | otherwise = scanr f (last t) (init t) 690 691 -- | /O(n)/ Like a combination of 'map' and 'foldl'. Applies a 692 -- function to each element of a 'Text', passing an accumulating 693 -- parameter from left to right, and returns a final 'Text'. 694 mapAccumL :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text) 695 -- entered 8692 timesmapAccumL f s t = case uncons t of 696 Nothing -> (s, empty) 697 Just (x, xs) -> (s'', cons y ys) 698 where (s', y ) = f s x 699 (s'',ys) = mapAccumL f s' xs 700 701 -- | The 'mapAccumR' function behaves like a combination of 'map' and 702 -- 'foldr'; it applies a function to each element of a 'Text', passing 703 -- an accumulating parameter from right to left, and returning a final 704 -- value of this accumulator together with the new 'Text'. 705 mapAccumR :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text) 706 -- entered 7942 timesmapAccumR f s t = case uncons t of 707 Nothing -> (s, empty) 708 Just (x, xs) -> (s'', cons y ys) 709 where (s'',y ) = f s' x 710 (s', ys) = mapAccumR f s xs 711 712 -- | /O(n*m)/ 'replicate' @n@ @t@ is a 'Text' consisting of the input 713 -- @t@ repeated @n@ times. 714 replicate :: Int64 -> Text -> Text 715 -- entered 100 timesreplicate n t = concat (rep 0) 716 where rep i | i >= n = [] 717 | otherwise = t : rep (i+1) 718 {-# INLINE replicate #-} 719 720 -- | /O(n)/ 'replicateChar' @n@ @c@ is a 'Text' of length @n@ with @c@ the 721 -- value of every element. Subject to fusion. 722 replicateChar :: Int64 -> Char -> Text 723 -- entered 1135 timesreplicateChar n c = unstream (S.replicateCharI n c) 724 {-# INLINE replicateChar #-} 725 726 {-# RULES 727 "LAZY TEXT replicate/singleton -> replicateChar" [~1] forall n c. 728 replicate n (singleton c) = replicateChar n c 729 #-} 730 731 -- | /O(n)/, where @n@ is the length of the result. The 'unfoldr' 732 -- function is analogous to the List 'L.unfoldr'. 'unfoldr' builds a 733 -- 'Text' from a seed value. The function takes the element and 734 -- returns 'Nothing' if it is done producing the 'Text', otherwise 735 -- 'Just' @(a,b)@. In this case, @a@ is the next 'Char' in the 736 -- string, and @b@ is the seed value for further production. 737 unfoldr :: (a -> Maybe (Char,a)) -> a -> Text 738 -- entered 100 timesunfoldr f s = unstream (S.unfoldr f s) 739 {-# INLINE unfoldr #-} 740 741 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a 'Text' from a seed 742 -- value. However, the length of the result should be limited by the 743 -- first argument to 'unfoldrN'. This function is more efficient than 744 -- 'unfoldr' when the maximum length of the result is known and 745 -- correct, otherwise its performance is similar to 'unfoldr'. 746 unfoldrN :: Int64 -> (a -> Maybe (Char,a)) -> a -> Text 747 -- entered 100 timesunfoldrN n f s = unstream (S.unfoldrN n f s) 748 {-# INLINE unfoldrN #-} 749 750 -- | /O(n)/ 'take' @n@, applied to a 'Text', returns the prefix of the 751 -- 'Text' of length @n@, or the 'Text' itself if @n@ is greater than 752 -- the length of the Text. Subject to fusion. 753 take :: Int64 -> Text -> Text 754 -- entered 400 timestake i _ | i <= 0 = Empty 755 take i t0 = take' i t0 756 where take' 0 _ = Empty 757 take' _ Empty = Empty 758 take' n (Chunk t ts) 759 | n < len = Chunk (T.take (fromIntegral n) t) Empty 760 | otherwise = Chunk t (take' (n - len) ts) 761 where len = fromIntegral (T.length t) 762 {-# INLINE [1] take #-} 763 764 {-# RULES 765 "LAZY TEXT take -> fused" [~1] forall n t. 766 take n t = unstream (S.take n (stream t)) 767 "LAZY TEXT take -> unfused" [1] forall n t. 768 unstream (S.take n (stream t)) = take n t 769 #-} 770 771 -- | /O(n)/ 'drop' @n@, applied to a 'Text', returns the suffix of the 772 -- 'Text' after the first @n@ characters, or the empty 'Text' if @n@ 773 -- is greater than the length of the 'Text'. Subject to fusion. 774 drop :: Int64 -> Text -> Text 775 -- entered 400 timesdrop i t0 776 | i <= 0 = t0 777 | otherwise = drop' i t0 778 where drop' 0 ts = ts 779 drop' _ Empty = Empty 780 drop' n (Chunk t ts) 781 | n < len = Chunk (T.drop (fromIntegral n) t) ts 782 | otherwise = drop' (n - len) ts 783 where len = fromIntegral (T.length t) 784 {-# INLINE [1] drop #-} 785 786 {-# RULES 787 "LAZY TEXT drop -> fused" [~1] forall n t. 788 drop n t = unstream (S.drop n (stream t)) 789 "LAZY TEXT drop -> unfused" [1] forall n t. 790 unstream (S.drop n (stream t)) = drop n t 791 #-} 792 793 -- | /O(n)/ 'dropWords' @n@ returns the suffix with @n@ 'Word16' 794 -- values dropped, or the empty 'Text' if @n@ is greater than the 795 -- number of 'Word16' values present. 796 dropWords :: Int64 -> Text -> Text 797 -- entered 2180 timesdropWords i t0 798 | i <= 0 = t0 799 | otherwise = drop' i t0 800 where drop' 0 ts = ts 801 drop' _ Empty = Empty 802 drop' n (Chunk (T.Text arr off len) ts) 803 | n < len' = chunk (textP arr (off+n') (len-n')) ts 804 | otherwise = drop' (n - len') ts 805 where len' = fromIntegral len 806 n' = fromIntegral n 807 808 -- | /O(n)/ 'takeWhile', applied to a predicate @p@ and a 'Text', returns 809 -- the longest prefix (possibly empty) of elements that satisfy @p@. 810 -- This function is subject to array fusion. 811 takeWhile :: (Char -> Bool) -> Text -> Text 812 -- entered 400 timestakeWhile p t0 = takeWhile' t0 813 where takeWhile' Empty = Empty 814 takeWhile' (Chunk t ts) = 815 case T.findIndex (not . p) t of 816 Just n | n > 0 -> Chunk (T.take n t) Empty 817 | otherwise -> Empty 818 Nothing -> Chunk t (takeWhile' ts) 819 {-# INLINE [1] takeWhile #-} 820 821 {-# RULES 822 "LAZY TEXT takeWhile -> fused" [~1] forall p t. 823 takeWhile p t = unstream (S.takeWhile p (stream t)) 824 "LAZY TEXT takeWhile -> unfused" [1] forall p t. 825 unstream (S.takeWhile p (stream t)) = takeWhile p t 826 #-} 827 828 -- | /O(n)/ 'dropWhile' @p@ @t@ returns the suffix remaining after 829 -- 'takeWhile' @p@ @t@. This function is subject to array fusion. 830 dropWhile :: (Char -> Bool) -> Text -> Text 831 -- entered 800 timesdropWhile p t0 = dropWhile' t0 832 where dropWhile' Empty = Empty 833 dropWhile' (Chunk t ts) = 834 case T.findIndex (not . p) t of 835 Just n -> Chunk (T.drop n t) ts 836 Nothing -> dropWhile' ts 837 {-# INLINE [1] dropWhile #-} 838 839 {-# RULES 840 "LAZY TEXT dropWhile -> fused" [~1] forall p t. 841 dropWhile p t = unstream (S.dropWhile p (stream t)) 842 "LAZY TEXT dropWhile -> unfused" [1] forall p t. 843 unstream (S.dropWhile p (stream t)) = dropWhile p t 844 #-} 845 -- | /O(n)/ 'dropWhileEnd' @p@ @t@ returns the prefix remaining after 846 -- dropping characters that fail the predicate @p@ from the end of 847 -- @t@. 848 -- Examples: 849 -- 850 -- > dropWhileEnd (=='.') "foo..." == "foo" 851 dropWhileEnd :: (Char -> Bool) -> Text -> Text 852 -- entered 1200 timesdropWhileEnd p = go 853 where go Empty = Empty 854 go (Chunk t Empty) = if T.null t' 855 then Empty 856 else Chunk t' Empty 857 where t' = T.dropWhileEnd p t 858 go (Chunk t ts) = case go ts of 859 Empty -> go (Chunk t Empty) 860 ts' -> Chunk t ts' 861 {-# INLINE dropWhileEnd #-} 862 863 -- | /O(n)/ 'dropAround' @p@ @t@ returns the substring remaining after 864 -- dropping characters that fail the predicate @p@ from both the 865 -- beginning and end of @t@. Subject to fusion. 866 dropAround :: (Char -> Bool) -> Text -> Text 867 -- entered 102 timesdropAround p = dropWhile p . dropWhileEnd p 868 {-# INLINE [1] dropAround #-} 869 870 -- | /O(n)/ Remove leading white space from a string. Equivalent to: 871 -- 872 -- > dropWhile isSpace 873 stripStart :: Text -> Text 874 -- entered oncestripStart = dropWhile isSpace 875 {-# INLINE [1] stripStart #-} 876 877 -- | /O(n)/ Remove trailing white space from a string. Equivalent to: 878 -- 879 -- > dropWhileEnd isSpace 880 stripEnd :: Text -> Text 881 -- entered oncestripEnd = dropWhileEnd isSpace 882 {-# INLINE [1] stripEnd #-} 883 884 -- | /O(n)/ Remove leading and trailing white space from a string. 885 -- Equivalent to: 886 -- 887 -- > dropAround isSpace 888 strip :: Text -> Text 889 -- entered oncestrip = dropAround isSpace 890 {-# INLINE [1] strip #-} 891 892 -- | /O(n)/ 'splitAt' @n t@ returns a pair whose first element is a 893 -- prefix of @t@ of length @n@, and whose second is the remainder of 894 -- the string. It is equivalent to @('take' n t, 'drop' n t)@. 895 splitAt :: Int64 -> Text -> (Text, Text) 896 -- entered 6030 timessplitAt = loop 897 where loop _ Empty = (empty, empty) 898 loop n t | n <= 0 = (empty, t) 899 loop n (Chunk t ts) 900 | n < len = let (t',t'') = T.splitAt (fromIntegral n) t 901 in (Chunk t' Empty, Chunk t'' ts) 902 | otherwise = let (ts',ts'') = loop (n - len) ts 903 in (Chunk t ts', ts'') 904 where len = fromIntegral (T.length t) 905 906 -- | /O(n)/ 'splitAtWord' @n t@ returns a strict pair whose first 907 -- element is a prefix of @t@ whose chunks contain @n@ 'Word16' 908 -- values, and whose second is the remainder of the string. 909 splitAtWord :: Int64 -> Text -> PairS Text Text 910 -- entered 4585 timessplitAtWord _ Empty = empty :*: empty 911 splitAtWord x (Chunk c@(T.Text arr off len) cs) 912 | y >= len = let h :*: t = splitAtWord (x-fromIntegral len) cs 913 in Chunk c h :*: t 914 | otherwise = chunk (textP arr off y) empty :*: 915 chunk (textP arr (off+y) (len-y)) cs 916 where y = fromIntegral x 917 918 -- | /O(n+m)/ Find the first instance of @needle@ (which must be 919 -- non-'null') in @haystack@. The first element of the returned tuple 920 -- is the prefix of @haystack@ before @needle@ is matched. The second 921 -- is the remainder of @haystack@, starting with the match. 922 -- 923 -- Examples: 924 -- 925 -- > break "::" "a::b::c" ==> ("a", "::b::c") 926 -- > break "/" "foobar" ==> ("foobar", "") 927 -- 928 -- Laws: 929 -- 930 -- > append prefix match == haystack 931 -- > where (prefix, match) = break needle haystack 932 -- 933 -- If you need to break a string by a substring repeatedly (e.g. you 934 -- want to break on every instance of a substring), use 'find' 935 -- instead, as it has lower startup overhead. 936 -- 937 -- This function is strict in its first argument, and lazy in its 938 -- second. 939 -- 940 -- In (unlikely) bad cases, this function's time complexity degrades 941 -- towards /O(n*m)/. 942 break :: Text -> Text -> (Text, Text) 943 -- entered 300 timesbreak pat src 944 | null pat = emptyError "break" 945 | otherwise = case indices pat src of 946 [] -> (src, empty) 947 (x:_) -> let h :*: t = splitAtWord x src 948 in (h, t) 949 950 -- | /O(n+m)/ Similar to 'break', but searches from the end of the string. 951 -- 952 -- The first element of the returned tuple is the prefix of @haystack@ 953 -- up to and including the last match of @needle@. The second is the 954 -- remainder of @haystack@, following the match. 955 -- 956 -- > breakEnd "::" "a::b::c" ==> ("a::b::", "c") 957 breakEnd :: Text -> Text -> (Text, Text) 958 -- entered 100 timesbreakEnd pat src = let (a,b) = break (reverse pat) (reverse src) 959 in (reverse b, reverse a) 960 {-# INLINE breakEnd #-} 961 962 -- | /O(n+m)/ Find all non-overlapping instances of @needle@ in 963 -- @haystack@. Each element of the returned list consists of a pair: 964 -- 965 -- * The entire string prior to the /k/th match (i.e. the prefix) 966 -- 967 -- * The /k/th match, followed by the remainder of the string 968 -- 969 -- Examples: 970 -- 971 -- > find "::" "" 972 -- > ==> [] 973 -- > find "/" "a/b/c/" 974 -- > ==> [("a", "/b/c/"), ("a/b", "/c/"), ("a/b/c", "/")] 975 -- 976 -- This function is strict in its first argument, and lazy in its 977 -- second. 978 -- 979 -- In (unlikely) bad cases, this function's time complexity degrades 980 -- towards /O(n*m)/. 981 -- 982 -- The @needle@ parameter may not be empty. 983 find :: Text -- ^ @needle@ to search for 984 -> Text -- ^ @haystack@ in which to search 985 -> [(Text, Text)] 986 -- entered 300 timesfind pat src 987 | null pat = emptyError "find" 988 | otherwise = go 0 empty src (indices pat src) 989 where 990 go !n p s (x:xs) = let h :*: t = splitAtWord (x-n) s 991 h' = append p h 992 in (h',t) : go x h' t xs 993 go _ _ _ _ = [] 994 995 -- | /O(n)/ 'breakBy' is like 'spanBy', but the prefix returned is over 996 -- elements that fail the predicate @p@. 997 breakBy :: (Char -> Bool) -> Text -> (Text, Text) 998 -- entered 11,569 timesbreakBy p t0 = break' t0 999 where break' Empty = (empty, empty) 1000 break' c@(Chunk t ts) = 1001 case T.findIndex p t of 1002 Nothing -> let (ts', ts'') = break' ts 1003 in (Chunk t ts', ts'') 1004 Just n | n == 0 -> (Empty, c) 1005 | otherwise -> let (a,b) = T.splitAt n t 1006 in (Chunk a Empty, Chunk b ts) 1007 1008 -- | /O(n)/ 'spanBy', applied to a predicate @p@ and text @t@, returns 1009 -- a pair whose first element is the longest prefix (possibly empty) 1010 -- of @t@ of elements that satisfy @p@, and whose second is the 1011 -- remainder of the list. 1012 spanBy :: (Char -> Bool) -> Text -> (Text, Text) 1013 -- entered 10,483 timesspanBy p = breakBy (not . p) 1014 {-# INLINE spanBy #-} 1015 1016 -- | The 'group' function takes a 'Text' and returns a list of 'Text's 1017 -- such that the concatenation of the result is equal to the argument. 1018 -- Moreover, each sublist in the result contains only equal elements. 1019 -- For example, 1020 -- 1021 -- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"] 1022 -- 1023 -- It is a special case of 'groupBy', which allows the programmer to 1024 -- supply their own equality test. 1025 group :: Text -> [Text] 1026 -- entered 400 timesgroup = groupBy (==) 1027 {-# INLINE group #-} 1028 1029 -- | The 'groupBy' function is the non-overloaded version of 'group'. 1030 groupBy :: (Char -> Char -> Bool) -> Text -> [Text] 1031 -- entered 11,183 timesgroupBy _ Empty = [] 1032 groupBy eq (Chunk t ts) = cons x ys : groupBy eq zs 1033 where (ys,zs) = spanBy (eq x) xs 1034 x = T.unsafeHead t 1035 xs = chunk (T.unsafeTail t) ts 1036 1037 -- | /O(n)/ Return all initial segments of the given 'Text', 1038 -- shortest first. 1039 inits :: Text -> [Text] 1040 -- entered onceinits = (Empty :) . inits' 1041 where inits' Empty = [] 1042 inits' (Chunk t ts) = L.map (\t' -> Chunk t' Empty) (L.tail (T.inits t)) 1043 ++ L.map (Chunk t) (inits' ts) 1044 1045 -- | /O(n)/ Return all final segments of the given 'Text', longest 1046 -- first. 1047 tails :: Text -> [Text] 1048 -- entered 7138 timestails Empty = Empty : [] 1049 tails ts@(Chunk t ts') 1050 | T.length t == 1 = ts : tails ts' 1051 | otherwise = ts : tails (Chunk (T.unsafeTail t) ts') 1052 1053 -- $split 1054 -- 1055 -- Splitting functions in this library do not perform character-wise 1056 -- copies to create substrings; they just construct new 'Text's that 1057 -- are slices of the original. 1058 1059 -- | /O(m+n)/ Break a 'Text' into pieces separated by the first 1060 -- 'Text' argument, consuming the delimiter. An empty delimiter is 1061 -- invalid, and will cause an error to be raised. 1062 -- 1063 -- Examples: 1064 -- 1065 -- > split "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"] 1066 -- > split "aaa" "aaaXaaaXaaaXaaa" == ["","X","X","X",""] 1067 -- > split "x" "x" == ["",""] 1068 -- 1069 -- and 1070 -- 1071 -- > intercalate s . split s == id 1072 -- > split (singleton c) == splitBy (==c) 1073 -- 1074 -- This function is strict in its first argument, and lazy in its 1075 -- second. 1076 -- 1077 -- In (unlikely) bad cases, this function's time complexity degrades 1078 -- towards /O(n*m)/. 1079 split :: Text -- ^ Text to split on 1080 -> Text -- ^ Input text 1081 -> [Text] 1082 -- entered 700 timessplit pat src 1083 | null pat = emptyError "split" 1084 | isSingleton pat = splitBy (== head pat) src 1085 | otherwise = go 0 (indices pat src) src 1086 where 1087 go _ [] cs = [cs] 1088 go !i (x:xs) cs = let h :*: t = splitAtWord (x-i) cs 1089 in h : go (x+l) xs (dropWords l t) 1090 l = foldlChunks (\a (T.Text _ _ b) -> a + fromIntegral b) 0 pat 1091 {-# INLINE [1] split #-} 1092 1093 {-# RULES 1094 "LAZY TEXT split/singleton -> splitBy/==" [~1] forall c t. 1095 split (singleton c) t = splitBy (==c) t 1096 #-} 1097 1098 -- | /O(n)/ Splits a 'Text' into components delimited by separators, 1099 -- where the predicate returns True for a separator element. The 1100 -- resulting components do not contain the separators. Two adjacent 1101 -- separators result in an empty component in the output. eg. 1102 -- 1103 -- > splitBy (=='a') "aabbaca" == ["","","bb","c",""] 1104 -- > splitBy (=='a') [] == [""] 1105 splitBy :: (Char -> Bool) -> Text -> [Text] 1106 -- entered 830 timessplitBy _ Empty = [Empty] 1107 splitBy p (Chunk t0 ts0) = comb [] (T.splitBy p t0) ts0 1108 where comb acc (s:[]) Empty = revChunks (s:acc) : [] 1109 comb acc (s:[]) (Chunk t ts) = comb (s:acc) (T.splitBy p t) ts 1110 comb acc (s:ss) ts = revChunks (s:acc) : comb [] ss ts 1111 comb _ [] _ = impossibleError "splitBy" 1112 {-# INLINE splitBy #-} 1113 1114 -- | /O(n)/ Splits a 'Text' into components of length @k@. The last 1115 -- element may be shorter than the other chunks, depending on the 1116 -- length of the input. Examples: 1117 -- 1118 -- > chunksOf 3 "foobarbaz" == ["foo","bar","baz"] 1119 -- > chunksOf 4 "haskell.org" == ["hask","ell.","org"] 1120 chunksOf :: Int64 -> Text -> [Text] 1121 -- entered 100 timeschunksOf k = go 1122 where 1123 go t = case splitAt k t of 1124 (a,b) | null a -> [] 1125 | otherwise -> a : go b 1126 {-# INLINE chunksOf #-} 1127 1128 -- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at 1129 -- newline 'Char's. The resulting strings do not contain newlines. 1130 lines :: Text -> [Text] 1131 -- entered 427 timeslines Empty = [] 1132 lines t = let (l,t') = breakBy ((==) '\n') t 1133 in l : if null t' then [] 1134 else lines (tail t') 1135 1136 -- | /O(n)/ Breaks a 'Text' up into a list of words, delimited by 'Char's 1137 -- representing white space. 1138 words :: Text -> [Text] 1139 -- entered 400 timeswords = L.filter (not . null) . splitBy isSpace 1140 {-# INLINE words #-} 1141 1142 -- | /O(n)/ Joins lines, after appending a terminating newline to 1143 -- each. 1144 unlines :: [Text] -> Text 1145 -- entered 101 timesunlines = concat . L.map (`snoc` '\n') 1146 {-# INLINE unlines #-} 1147 1148 -- | /O(n)/ Joins words using single space characters. 1149 unwords :: [Text] -> Text 1150 -- entered 100 timesunwords = intercalate (singleton ' ') 1151 {-# INLINE unwords #-} 1152 1153 -- | /O(n)/ The 'isPrefixOf' function takes two 'Text's and returns 1154 -- 'True' iff the first is a prefix of the second. This function is 1155 -- subject to fusion. 1156 isPrefixOf :: Text -> Text -> Bool 1157 -- entered 2995 timesisPrefixOf Empty _ = True 1158 isPrefixOf _ Empty = False 1159 isPrefixOf (Chunk x xs) (Chunk y ys) 1160 | lx == ly = x == y && isPrefixOf xs ys 1161 | lx < ly = x == yh && isPrefixOf xs (Chunk yt ys) 1162 | otherwise = xh == y && isPrefixOf (Chunk xt xs) ys 1163 where (xh,xt) = T.splitAt ly x 1164 (yh,yt) = T.splitAt lx y 1165 lx = T.length x 1166 ly = T.length y 1167 {-# INLINE [1] isPrefixOf #-} 1168 1169 {-# RULES 1170 "LAZY TEXT isPrefixOf -> fused" [~1] forall s t. 1171 isPrefixOf s t = S.isPrefixOf (stream s) (stream t) 1172 "LAZY TEXT isPrefixOf -> unfused" [1] forall s t. 1173 S.isPrefixOf (stream s) (stream t) = isPrefixOf s t 1174 #-} 1175 1176 -- | /O(n)/ The 'isSuffixOf' function takes two 'Text's and returns 1177 -- 'True' iff the first is a suffix of the second. 1178 isSuffixOf :: Text -> Text -> Bool 1179 -- entered 800 timesisSuffixOf x y = reverse x `isPrefixOf` reverse y 1180 {-# INLINE isSuffixOf #-} 1181 -- TODO: a better implementation 1182 1183 -- | /O(n+m)/ The 'isInfixOf' function takes two 'Text's and returns 1184 -- 'True' iff the first is contained, wholly and intact, anywhere 1185 -- within the second. 1186 -- 1187 -- This function is strict in its first argument, and lazy in its 1188 -- second. 1189 -- 1190 -- In (unlikely) bad cases, this function's time complexity degrades 1191 -- towards /O(n*m)/. 1192 isInfixOf :: Text -> Text -> Bool 1193 -- entered 400 timesisInfixOf needle haystack 1194 | null needle = True 1195 | isSingleton needle = S.elem (head needle) . S.stream $ haystack 1196 | otherwise = not . L.null . indices needle $ haystack 1197 {-# INLINE [1] isInfixOf #-} 1198 1199 {-# RULES 1200 "LAZY TEXT isInfixOf/singleton -> S.elem/S.stream" [~1] forall n h. 1201 isInfixOf (singleton n) h = S.elem n (S.stream h) 1202 #-} 1203 1204 ------------------------------------------------------------------------------- 1205 -- * View patterns 1206 1207 -- | /O(n)/ Returns the suffix of the second string if its prefix 1208 -- matches the first. 1209 -- 1210 -- Examples: 1211 -- 1212 -- > prefixed "foo" "foobar" == Just "bar" 1213 -- > prefixed "foo" "quux" == Nothing 1214 -- 1215 -- This is particularly useful with the @ViewPatterns@ extension to 1216 -- GHC, as follows: 1217 -- 1218 -- > {-# LANGUAGE ViewPatterns #-} 1219 -- > import Data.Text as T 1220 -- > 1221 -- > fnordLength :: Text -> Int 1222 -- > fnordLength (prefixed "fnord" -> Just suf) = T.length suf 1223 -- > fnordLength _ = -1 1224 prefixed :: Text -> Text -> Maybe Text 1225 -- Yes, this could be much more efficient. 1226 -- entered 400 timesprefixed p t 1227 | p `isPrefixOf` t = Just (drop (length p) t) 1228 | otherwise = Nothing 1229 1230 -- | /O(n)/ Returns the prefix of the second string if its suffix 1231 -- matches the first. 1232 -- 1233 -- Examples: 1234 -- 1235 -- > suffixed "bar" "foobar" == Just "foo" 1236 -- > suffixed "foo" "quux" == Nothing 1237 -- 1238 -- This is particularly useful with the @ViewPatterns@ extension to 1239 -- GHC, as follows: 1240 -- 1241 -- > {-# LANGUAGE ViewPatterns #-} 1242 -- > import Data.Text as T 1243 -- > 1244 -- > quuxLength :: Text -> Int 1245 -- > quuxLength (suffixed "quux" -> Just pre) = T.length pre 1246 -- > quuxLength _ = -1 1247 suffixed :: Text -> Text -> Maybe Text 1248 -- Yes, this could be much more efficient. 1249 -- entered 400 timessuffixed p t 1250 | p `isSuffixOf` t = Just (take (length t - length p) t) 1251 | otherwise = Nothing 1252 1253 -- | /O(n)/ 'filter', applied to a predicate and a 'Text', 1254 -- returns a 'Text' containing those characters that satisfy the 1255 -- predicate. 1256 filter :: (Char -> Bool) -> Text -> Text 1257 -- entered 3912 timesfilter p t = unstream (S.filter p (stream t)) 1258 {-# INLINE filter #-} 1259 1260 -- | /O(n)/ The 'findBy' function takes a predicate and a 'Text', and 1261 -- returns the first element in matching the predicate, or 'Nothing' 1262 -- if there is no such element. 1263 findBy :: (Char -> Bool) -> Text -> Maybe Char 1264 -- entered 400 timesfindBy p t = S.findBy p (stream t) 1265 {-# INLINE findBy #-} 1266 1267 -- | /O(n)/ The 'partitionBy' function takes a predicate and a 'Text', 1268 -- and returns the pair of 'Text's with elements which do and do not 1269 -- satisfy the predicate, respectively; i.e. 1270 -- 1271 -- > partitionBy p t == (filter p t, filter (not . p) t) 1272 partitionBy :: (Char -> Bool) -> Text -> (Text, Text) 1273 -- entered 400 timespartitionBy p t = (filter p t, filter (not . p) t) 1274 {-# INLINE partitionBy #-} 1275 1276 -- | /O(n)/ 'Text' index (subscript) operator, starting from 0. 1277 index :: Text -> Int64 -> Char 1278 -- entered 100 timesindex t n = S.index (stream t) n 1279 {-# INLINE index #-} 1280 1281 -- | /O(n+m)/ The 'count' function returns the number of times the 1282 -- query string appears in the given 'Text'. An empty query string is 1283 -- invalid, and will cause an error to be raised. 1284 -- 1285 -- In (unlikely) bad cases, this function's time complexity degrades 1286 -- towards /O(n*m)/. 1287 count :: Text -> Text -> Int64 1288 -- entered 200 timescount pat src 1289 | null pat = emptyError "count" 1290 | otherwise = go 0 (indices pat src) 1291 where go !n [] = n 1292 go !n (_:xs) = go (n+1) xs 1293 {-# INLINE [1] count #-} 1294 1295 {-# RULES 1296 "LAZY TEXT count/singleton -> countChar" [~1] forall c t. 1297 count (singleton c) t = countChar c t 1298 #-} 1299 1300 -- | /O(n)/ The 'countChar' function returns the number of times the 1301 -- query element appears in the given 'Text'. This function is subject 1302 -- to fusion. 1303 countChar :: Char -> Text -> Int64 1304 -- never enteredcountChar c t = S.countChar c (stream t) 1305 1306 -- | /O(n)/ 'zip' takes two 'Text's and returns a list of 1307 -- corresponding pairs of bytes. If one input 'Text' is short, 1308 -- excess elements of the longer 'Text' are discarded. This is 1309 -- equivalent to a pair of 'unpack' operations. 1310 zip :: Text -> Text -> [(Char,Char)] 1311 -- entered 400 timeszip a b = S.unstreamList $ S.zipWith (,) (stream a) (stream b) 1312 {-# INLINE [0] zip #-} 1313 1314 -- | /O(n)/ 'zipWith' generalises 'zip' by zipping with the function 1315 -- given as the first argument, instead of a tupling function. 1316 zipWith :: (Char -> Char -> Char) -> Text -> Text -> Text 1317 -- entered 400 timeszipWith f t1 t2 = unstream (S.zipWith f (stream t1) (stream t2)) 1318 {-# INLINE [0] zipWith #-} 1319 1320 revChunks :: [T.Text] -> Text 1321 -- entered 4936 timesrevChunks = L.foldl' (flip chunk) Empty 1322 1323 emptyError :: String -> a 1324 -- entered 45 timesemptyError fun = P.error ("Data.Text.Lazy." ++ fun ++ ": empty input") 1325 1326 impossibleError :: String -> a 1327 -- never enteredimpossibleError fun = P.error ("Data.Text.Lazy." ++ fun ++ ": impossible case")