1 {-# LANGUAGE BangPatterns, CPP, Rank2Types #-} 2 {-# OPTIONS_GHC -fno-warn-orphans #-} 3 4 -- | 5 -- Module : Data.Text 6 -- Copyright : (c) 2008, 2009 Tom Harper, 7 -- (c) 2009, 2010 Bryan O'Sullivan, 8 -- (c) 2009 Duncan Coutts 9 -- 10 -- License : BSD-style 11 -- Maintainer : bos@serpentine.com, rtomharper@googlemail.com, 12 -- duncan@haskell.org 13 -- Stability : experimental 14 -- Portability : GHC 15 -- 16 -- A time and space-efficient implementation of Unicode text using 17 -- packed Word16 arrays. Suitable for performance critical use, both 18 -- in terms of large data quantities and high speed. 19 -- 20 -- This module is intended to be imported @qualified@, to avoid name 21 -- clashes with "Prelude" functions, e.g. 22 -- 23 -- > import qualified Data.Text as T 24 25 module Data.Text 26 ( 27 -- * Fusion 28 -- $fusion 29 30 -- * Types 31 Text 32 33 -- * Creation and elimination 34 , pack 35 , unpack 36 , singleton 37 , empty 38 39 -- * Basic interface 40 , cons 41 , snoc 42 , append 43 , uncons 44 , head 45 , last 46 , tail 47 , init 48 , null 49 , length 50 , compareLength 51 52 -- * Transformations 53 , map 54 , intercalate 55 , intersperse 56 , transpose 57 , reverse 58 , replace 59 60 -- ** Case conversion 61 -- $case 62 , toCaseFold 63 , toLower 64 , toUpper 65 66 -- ** Justification 67 , justifyLeft 68 , justifyRight 69 , center 70 71 -- * Folds 72 , foldl 73 , foldl' 74 , foldl1 75 , foldl1' 76 , foldr 77 , foldr1 78 79 -- ** Special folds 80 , concat 81 , concatMap 82 , any 83 , all 84 , maximum 85 , minimum 86 87 -- * Construction 88 89 -- ** Scans 90 , scanl 91 , scanl1 92 , scanr 93 , scanr1 94 95 -- ** Accumulating maps 96 , mapAccumL 97 , mapAccumR 98 99 -- ** Generation and unfolding 100 , replicate 101 , unfoldr 102 , unfoldrN 103 104 -- * Substrings 105 106 -- ** Breaking strings 107 , take 108 , drop 109 , takeWhile 110 , dropWhile 111 , dropWhileEnd 112 , dropAround 113 , strip 114 , stripStart 115 , stripEnd 116 , splitAt 117 , spanBy 118 , break 119 , breakEnd 120 , breakBy 121 , group 122 , groupBy 123 , inits 124 , tails 125 126 -- ** Breaking into many substrings 127 -- $split 128 , split 129 , splitBy 130 , chunksOf 131 132 -- ** Breaking into lines and words 133 , lines 134 --, lines' 135 , words 136 , unlines 137 , unwords 138 139 -- * Predicates 140 , isPrefixOf 141 , isSuffixOf 142 , isInfixOf 143 144 -- ** View patterns 145 , prefixed 146 , suffixed 147 148 -- * Searching 149 , filter 150 , find 151 , findBy 152 , partitionBy 153 154 -- , findSubstring 155 156 -- * Indexing 157 -- $index 158 , index 159 , findIndex 160 , count 161 162 -- * Zipping and unzipping 163 , zip 164 , zipWith 165 166 -- -* Ordered text 167 -- , sort 168 ) where 169 170 import Prelude (Char, Bool(..), Functor(..), Int, Maybe(..), String, 171 Eq(..), Ord(..), Ordering(..), (++), 172 Read(..), Show(..), 173 (&&), (||), (+), (-), (.), ($), ($!), (>>), (*), 174 div, error, not, return, otherwise) 175 #if defined(HAVE_DEEPSEQ) 176 import Control.DeepSeq (NFData) 177 #endif 178 #if defined(ASSERTS) 179 import Control.Exception (assert) 180 #endif 181 import Data.Char (isSpace) 182 import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf)) 183 #if __GLASGOW_HASKELL__ >= 612 184 import Data.Data (mkNoRepType) 185 #else 186 import Data.Data (mkNorepType) 187 #endif 188 import Control.Monad (foldM) 189 import qualified Data.Text.Array as A 190 import qualified Data.List as L 191 import Data.Monoid (Monoid(..)) 192 import Data.String (IsString(..)) 193 import qualified Data.Text.Fusion as S 194 import qualified Data.Text.Fusion.Common as S 195 import Data.Text.Fusion (stream, reverseStream, unstream) 196 import Data.Text.Internal (Text(..), empty, text, textP) 197 import qualified Prelude as P 198 import Data.Text.Unsafe (Iter(..), iter, iter_, lengthWord16, reverseIter, 199 unsafeHead, unsafeTail) 200 import Data.Text.UnsafeChar (unsafeChr) 201 import qualified Data.Text.Encoding.Utf16 as U16 202 import Data.Text.Search (indices) 203 204 -- $fusion 205 -- 206 -- Most of the functions in this module are subject to /fusion/, 207 -- meaning that a pipeline of such functions will usually allocate at 208 -- most one 'Text' value. 209 210 instance Eq Text where 211 -- entered 7765 timest1 == t2 = stream t1 == stream t2 212 {-# INLINE (==) #-} 213 214 instance Ord Text where 215 -- entered 100 timescompare t1 t2 = compare (stream t1) (stream t2) 216 {-# INLINE compare #-} 217 218 instance Show Text where 219 -- entered 100 timesshowsPrec p ps r = showsPrec p (unpack ps) r 220 221 instance Read Text where 222 -- entered 100 timesreadsPrec p str = [(pack x,y) | (x,y) <- readsPrec p str] 223 224 instance Monoid Text where 225 -- never enteredmempty = empty 226 -- entered 100 timesmappend = append 227 -- entered 100 timesmconcat = concat 228 229 instance IsString Text where 230 -- entered 401 timesfromString = pack 231 232 #if defined(HAVE_DEEPSEQ) 233 instance NFData Text 234 #endif 235 236 -- This instance preserves data abstraction at the cost of inefficiency. 237 -- We omit reflection services for the sake of data abstraction. 238 -- 239 -- This instance was created by copying the behavior of Data.Set and 240 -- Data.Map. If you feel a mistake has been made, please feel free to 241 -- submit improvements. 242 -- 243 -- Original discussion is archived here: 244 245 -- "could we get a Data instance for Data.Text.Text?" 246 -- http://groups.google.com/group/haskell-cafe/browse_thread/thread/b5bbb1b28a7e525d/0639d46852575b93 247 248 instance Data Text where 249 -- never enteredgfoldl f z txt = z pack `f` (unpack txt) 250 -- never enteredtoConstr _ = error "Data.Text.Text.toConstr" 251 -- never enteredgunfold _ _ = error "Data.Text.Text.gunfold" 252 #if __GLASGOW_HASKELL__ >= 612 253 -- never entereddataTypeOf _ = mkNoRepType "Data.Text.Text" 254 #else 255 dataTypeOf _ = mkNorepType "Data.Text.Text" 256 #endif 257 258 -- ----------------------------------------------------------------------------- 259 -- * Conversion to/from 'Text' 260 261 -- | /O(n)/ Convert a 'String' into a 'Text'. Subject to fusion. 262 pack :: String -> Text 263 -- entered 52,163 timespack = unstream . S.streamList 264 {-# INLINE [1] pack #-} 265 266 -- | /O(n)/ Convert a Text into a String. Subject to fusion. 267 unpack :: Text -> String 268 -- entered 21,661 timesunpack = S.unstreamList . stream 269 {-# INLINE [1] unpack #-} 270 271 -- | /O(1)/ Convert a character into a Text. 272 -- Subject to fusion. 273 singleton :: Char -> Text 274 -- entered 803 timessingleton = unstream . S.singleton 275 {-# INLINE [1] singleton #-} 276 277 -- ----------------------------------------------------------------------------- 278 -- * Basic functions 279 280 -- | /O(n)/ Adds a character to the front of a 'Text'. This function 281 -- is more costly than its 'List' counterpart because it requires 282 -- copying a new array. Subject to fusion. 283 cons :: Char -> Text -> Text 284 -- entered 14,455 timescons c t = unstream (S.cons c (stream t)) 285 {-# INLINE cons #-} 286 287 -- | /O(n)/ Adds a character to the end of a 'Text'. This copies the 288 -- entire array in the process, unless fused. Subject to fusion. 289 snoc :: Text -> Char -> Text 290 -- entered 5340 timessnoc t c = unstream (S.snoc (stream t) c) 291 {-# INLINE snoc #-} 292 293 -- | /O(n)/ Appends one 'Text' to the other by copying both of them 294 -- into a new 'Text'. Subject to fusion. 295 append :: Text -> Text -> Text 296 -- entered 2355 timesappend (Text arr1 off1 len1) (Text arr2 off2 len2) = Text (A.run x) 0 len 297 where 298 len = len1+len2 299 x = do 300 arr <- A.unsafeNew len 301 A.copyI arr 0 arr1 off1 len1 302 A.copyI arr len1 arr2 off2 (len1+len2) 303 return arr 304 {-# INLINE append #-} 305 306 {-# RULES 307 "TEXT append -> fused" [~1] forall t1 t2. 308 append t1 t2 = unstream (S.append (stream t1) (stream t2)) 309 "TEXT append -> unfused" [1] forall t1 t2. 310 unstream (S.append (stream t1) (stream t2)) = append t1 t2 311 #-} 312 313 -- | /O(1)/ Returns the first character of a 'Text', which must be 314 -- non-empty. Subject to fusion. 315 head :: Text -> Char 316 -- entered 400 timeshead t = S.head (stream t) 317 {-# INLINE head #-} 318 319 -- | /O(1)/ Returns the first character and rest of a 'Text', or 320 -- 'Nothing' if empty. Subject to fusion. 321 uncons :: Text -> Maybe (Char, Text) 322 -- entered 400 timesuncons t@(Text arr off len) 323 | len <= 0 = Nothing 324 | otherwise = Just (c, textP arr (off+d) (len-d)) 325 where Iter c d = iter t 0 326 {-# INLINE [1] uncons #-} 327 328 -- | Lifted from Control.Arrow and specialized. 329 second :: (b -> c) -> (a,b) -> (a,c) 330 -- entered 14,550 timessecond f (a, b) = (a, f b) 331 332 {-# RULES 333 "TEXT uncons -> fused" [~1] forall t. 334 uncons t = fmap (second unstream) (S.uncons (stream t)) 335 "TEXT uncons -> unfused" [1] forall t. 336 fmap (second unstream) (S.uncons (stream t)) = uncons t 337 #-} 338 339 -- | /O(1)/ Returns the last character of a 'Text', which must be 340 -- non-empty. Subject to fusion. 341 last :: Text -> Char 342 -- entered 1507 timeslast (Text arr off len) 343 | len <= 0 = emptyError "last" 344 | n < 0xDC00 || n > 0xDFFF = unsafeChr n 345 | otherwise = U16.chr2 n0 n 346 where n = A.unsafeIndex arr (off+len-1) 347 n0 = A.unsafeIndex arr (off+len-2) 348 {-# INLINE [1] last #-} 349 350 {-# RULES 351 "TEXT last -> fused" [~1] forall t. 352 last t = S.last (stream t) 353 "TEXT last -> unfused" [1] forall t. 354 S.last (stream t) = last t 355 #-} 356 357 -- | /O(1)/ Returns all characters after the head of a 'Text', which 358 -- must be non-empty. Subject to fusion. 359 tail :: Text -> Text 360 -- entered 758 timestail t@(Text arr off len) 361 | len <= 0 = emptyError "tail" 362 | otherwise = textP arr (off+d) (len-d) 363 where d = iter_ t 0 364 {-# INLINE [1] tail #-} 365 366 {-# RULES 367 "TEXT tail -> fused" [~1] forall t. 368 tail t = unstream (S.tail (stream t)) 369 "TEXT tail -> unfused" [1] forall t. 370 unstream (S.tail (stream t)) = tail t 371 #-} 372 373 -- | /O(1)/ Returns all but the last character of a 'Text', which must 374 -- be non-empty. Subject to fusion. 375 init :: Text -> Text 376 -- entered 1138 timesinit (Text arr off len) | len <= 0 = emptyError "init" 377 | n >= 0xDC00 && n <= 0xDFFF = textP arr off (len-2) 378 | otherwise = textP arr off (len-1) 379 where 380 n = A.unsafeIndex arr (off+len-1) 381 {-# INLINE [1] init #-} 382 383 {-# RULES 384 "TEXT init -> fused" [~1] forall t. 385 init t = unstream (S.init (stream t)) 386 "TEXT init -> unfused" [1] forall t. 387 unstream (S.init (stream t)) = init t 388 #-} 389 390 -- | /O(1)/ Tests whether a 'Text' is empty or not. Subject to 391 -- fusion. 392 null :: Text -> Bool 393 -- entered 63,523 timesnull (Text _arr _off len) = 394 #if defined(ASSERTS) 395 assert (len >= 0) $ 396 #endif 397 len <= 0 398 {-# INLINE [1] null #-} 399 400 {-# RULES 401 "TEXT null -> fused" [~1] forall t. 402 null t = S.null (stream t) 403 "TEXT null -> unfused" [1] forall t. 404 S.null (stream t) = null t 405 #-} 406 407 -- | /O(1)/ Tests whether a 'Text' contains exactly one character. 408 -- Subject to fusion. 409 isSingleton :: Text -> Bool 410 -- entered 1596 timesisSingleton = S.isSingleton . stream 411 {-# INLINE isSingleton #-} 412 413 -- | /O(n)/ Returns the number of characters in a 'Text'. 414 -- Subject to fusion. 415 length :: Text -> Int 416 -- entered 37,788 timeslength t = S.length (stream t) 417 {-# INLINE length #-} 418 419 -- | /O(n)/ Compare the count of characters in a 'Text' to a number. 420 -- Subject to fusion. 421 -- 422 -- This function gives the same answer as comparing against the result 423 -- of 'length', but can short circuit if the count of characters is 424 -- greater than the number, and hence be more efficient. 425 compareLength :: Text -> Int -> Ordering 426 -- entered 100 timescompareLength t n = S.compareLengthI (stream t) n 427 {-# INLINE [1] compareLength #-} 428 429 {-# RULES 430 "TEXT compareN/length -> compareLength" [~1] forall t n. 431 compare (length t) n = compareLength t n 432 #-} 433 434 {-# RULES 435 "TEXT ==N/length -> compareLength/==EQ" [~1] forall t n. 436 (==) (length t) n = compareLength t n == EQ 437 #-} 438 439 {-# RULES 440 "TEXT /=N/length -> compareLength//=EQ" [~1] forall t n. 441 (/=) (length t) n = compareLength t n /= EQ 442 #-} 443 444 {-# RULES 445 "TEXT <N/length -> compareLength/==LT" [~1] forall t n. 446 (<) (length t) n = compareLength t n == LT 447 #-} 448 449 {-# RULES 450 "TEXT <=N/length -> compareLength//=GT" [~1] forall t n. 451 (<=) (length t) n = compareLength t n /= GT 452 #-} 453 454 {-# RULES 455 "TEXT >N/length -> compareLength/==GT" [~1] forall t n. 456 (>) (length t) n = compareLength t n == GT 457 #-} 458 459 {-# RULES 460 "TEXT >=N/length -> compareLength//=LT" [~1] forall t n. 461 (>=) (length t) n = compareLength t n /= LT 462 #-} 463 464 -- ----------------------------------------------------------------------------- 465 -- * Transformations 466 -- | /O(n)/ 'map' @f@ @t@ is the 'Text' obtained by applying @f@ to 467 -- each element of @t@. Subject to fusion. 468 map :: (Char -> Char) -> Text -> Text 469 -- entered 500 timesmap f t = unstream (S.map f (stream t)) 470 {-# INLINE [1] map #-} 471 472 -- | /O(n)/ The 'intercalate' function takes a 'Text' and a list of 473 -- 'Text's and concatenates the list after interspersing the first 474 -- argument between each element of the list. 475 intercalate :: Text -> [Text] -> Text 476 -- entered 900 timesintercalate t = concat . (L.intersperse t) 477 {-# INLINE intercalate #-} 478 479 -- | /O(n)/ The 'intersperse' function takes a character and places it 480 -- between the characters of a 'Text'. Subject to fusion. 481 intersperse :: Char -> Text -> Text 482 -- entered 400 timesintersperse c t = unstream (S.intersperse c (stream t)) 483 {-# INLINE intersperse #-} 484 485 -- | /O(n)/ Reverse the characters of a string. Subject to fusion. 486 reverse :: Text -> Text 487 -- entered 4385 timesreverse t = S.reverse (stream t) 488 {-# INLINE reverse #-} 489 490 -- | /O(m+n)/ Replace every occurrence of one substring with another. 491 replace :: Text -- ^ Text to search for 492 -> Text -- ^ Replacement text 493 -> Text -- ^ Input text 494 -> Text 495 -- entered 100 timesreplace s d = intercalate d . split s 496 {-# INLINE replace #-} 497 498 -- ---------------------------------------------------------------------------- 499 -- ** Case conversions (folds) 500 501 -- $case 502 -- 503 -- When case converting 'Text' values, do not use combinators like 504 -- @map toUpper@ to case convert each character of a string 505 -- individually, as this gives incorrect results according to the 506 -- rules of some writing systems. The whole-string case conversion 507 -- functions from this module, such as @toUpper@, obey the correct 508 -- case conversion rules. As a result, these functions may map one 509 -- input character to two or three output characters. For examples, 510 -- see the documentation of each function. 511 -- 512 -- /Note/: In some languages, case conversion is a locale- and 513 -- context-dependent operation. The case conversion functions in this 514 -- module are /not/ locale sensitive. Programs that require locale 515 -- sensitivity should use appropriate versions of the case mapping 516 -- functions from the @text-icu@ package. 517 518 -- | /O(n)/ Convert a string to folded case. This function is mainly 519 -- useful for performing caseless (also known as case insensitive) 520 -- string comparisons. 521 -- 522 -- A string @x@ is a caseless match for a string @y@ if and only if: 523 -- 524 -- @toCaseFold x == toCaseFold y@ 525 -- 526 -- The result string may be longer than the input string, and may 527 -- differ from applying 'toLower' to the input string. For instance, 528 -- the Armenian small ligature \"ﬓ\" (men now, U+FB13) is case 529 -- folded to the sequence \"մ\" (men, U+0574) followed by 530 -- \"ն\" (now, U+0576), while the Greek \"µ\" (micro sign, 531 -- U+00B5) is case folded to \"μ\" (small letter mu, U+03BC) 532 -- instead of itself. 533 toCaseFold :: Text -> Text 534 -- entered 100 timestoCaseFold t = unstream (S.toCaseFold (stream t)) 535 {-# INLINE [0] toCaseFold #-} 536 537 -- | /O(n)/ Convert a string to lower case, using simple case 538 -- conversion. The result string may be longer than the input string. 539 -- For instance, \"İ\" (Latin capital letter I with dot above, 540 -- U+0130) maps to the sequence \"i\" (Latin small letter i, U+0069) followed 541 -- by \" ̇\" (combining dot above, U+0307). 542 toLower :: Text -> Text 543 -- entered 200 timestoLower t = unstream (S.toLower (stream t)) 544 {-# INLINE toLower #-} 545 546 -- | /O(n)/ Convert a string to upper case, using simple case 547 -- conversion. The result string may be longer than the input string. 548 -- For instance, the German \"ß\" (eszett, U+00DF) maps to the 549 -- two-letter sequence \"SS\". 550 toUpper :: Text -> Text 551 -- entered 200 timestoUpper t = unstream (S.toUpper (stream t)) 552 {-# INLINE toUpper #-} 553 554 -- | /O(n)/ Left-justify a string to the given length, using the 555 -- specified fill character on the right. Subject to fusion. Examples: 556 -- 557 -- > justifyLeft 7 'x' "foo" == "fooxxxx" 558 -- > justifyLeft 3 'x' "foobar" == "foobar" 559 justifyLeft :: Int -> Char -> Text -> Text 560 -- entered 400 timesjustifyLeft k c t 561 | len >= k = t 562 | otherwise = t `append` replicateChar (k-len) c 563 where len = length t 564 {-# INLINE [1] justifyLeft #-} 565 566 {-# RULES 567 "TEXT justifyLeft -> fused" [~1] forall k c t. 568 justifyLeft k c t = unstream (S.justifyLeftI k c (stream t)) 569 "TEXT justifyLeft -> unfused" [1] forall k c t. 570 unstream (S.justifyLeftI k c (stream t)) = justifyLeft k c t 571 #-} 572 573 -- | /O(n)/ Right-justify a string to the given length, using the 574 -- specified fill character on the left. Examples: 575 -- 576 -- > justifyRight 7 'x' "bar" == "xxxxbar" 577 -- > justifyRight 3 'x' "foobar" == "foobar" 578 justifyRight :: Int -> Char -> Text -> Text 579 -- entered 400 timesjustifyRight k c t 580 | len >= k = t 581 | otherwise = replicateChar (k-len) c `append` t 582 where len = length t 583 {-# INLINE justifyRight #-} 584 585 -- | /O(n)/ Center a string to the given length, using the 586 -- specified fill character on either side. Examples: 587 -- 588 -- > center 8 'x' "HS" = "xxxHSxxx" 589 center :: Int -> Char -> Text -> Text 590 -- entered 400 timescenter k c t 591 | len >= k = t 592 | otherwise = replicateChar l c `append` t `append` replicateChar r c 593 where len = length t 594 d = k - len 595 r = d `div` 2 596 l = d - r 597 {-# INLINE center #-} 598 599 -- | /O(n)/ The 'transpose' function transposes the rows and columns 600 -- of its 'Text' argument. Note that this function uses 'pack', 601 -- 'unpack', and the list version of transpose, and is thus not very 602 -- efficient. 603 transpose :: [Text] -> [Text] 604 -- entered 100 timestranspose ts = P.map pack (L.transpose (P.map unpack ts)) 605 606 -- ----------------------------------------------------------------------------- 607 -- * Reducing 'Text's (folds) 608 609 -- | /O(n)/ 'foldl', applied to a binary operator, a starting value 610 -- (typically the left-identity of the operator), and a 'Text', 611 -- reduces the 'Text' using the binary operator, from left to right. 612 -- Subject to fusion. 613 foldl :: (b -> Char -> b) -> b -> Text -> b 614 -- entered 400 timesfoldl f z t = S.foldl f z (stream t) 615 {-# INLINE foldl #-} 616 617 -- | /O(n)/ A strict version of 'foldl'. Subject to fusion. 618 foldl' :: (b -> Char -> b) -> b -> Text -> b 619 -- entered 400 timesfoldl' f z t = S.foldl' f z (stream t) 620 {-# INLINE foldl' #-} 621 622 -- | /O(n)/ A variant of 'foldl' that has no starting value argument, 623 -- and thus must be applied to a non-empty 'Text'. Subject to fusion. 624 foldl1 :: (Char -> Char -> Char) -> Text -> Char 625 -- entered 400 timesfoldl1 f t = S.foldl1 f (stream t) 626 {-# INLINE foldl1 #-} 627 628 -- | /O(n)/ A strict version of 'foldl1'. Subject to fusion. 629 foldl1' :: (Char -> Char -> Char) -> Text -> Char 630 -- entered 400 timesfoldl1' f t = S.foldl1' f (stream t) 631 {-# INLINE foldl1' #-} 632 633 -- | /O(n)/ 'foldr', applied to a binary operator, a starting value 634 -- (typically the right-identity of the operator), and a 'Text', 635 -- reduces the 'Text' using the binary operator, from right to left. 636 -- Subject to fusion. 637 foldr :: (Char -> b -> b) -> b -> Text -> b 638 -- entered 800 timesfoldr f z t = S.foldr f z (stream t) 639 {-# INLINE foldr #-} 640 641 -- | /O(n)/ A variant of 'foldr' that has no starting value argument, 642 -- and thust must be applied to a non-empty 'Text'. Subject to 643 -- fusion. 644 foldr1 :: (Char -> Char -> Char) -> Text -> Char 645 -- entered 400 timesfoldr1 f t = S.foldr1 f (stream t) 646 {-# INLINE foldr1 #-} 647 648 -- ----------------------------------------------------------------------------- 649 -- ** Special folds 650 651 -- | /O(n)/ Concatenate a list of 'Text's. 652 concat :: [Text] -> Text 653 -- entered 2488 timesconcat ts = case ts' of 654 [] -> empty 655 [t] -> t 656 _ -> Text (A.run go) 0 len 657 where 658 ts' = L.filter (not . null) ts 659 len = L.sum $ L.map lengthWord16 ts' 660 go = do 661 arr <- A.unsafeNew len 662 let step i (Text a o l) = 663 let !j = i + l in A.copyI arr i a o j >> return j 664 foldM step 0 ts' >> return arr 665 666 -- | /O(n)/ Map a function over a 'Text' that results in a 'Text', and 667 -- concatenate the results. 668 concatMap :: (Char -> Text) -> Text -> Text 669 -- entered 100 timesconcatMap f = concat . foldr ((:) . f) [] 670 {-# INLINE concatMap #-} 671 672 -- | /O(n)/ 'any' @p@ @t@ determines whether any character in the 673 -- 'Text' @t@ satisifes the predicate @p@. Subject to fusion. 674 any :: (Char -> Bool) -> Text -> Bool 675 -- entered 400 timesany p t = S.any p (stream t) 676 {-# INLINE any #-} 677 678 -- | /O(n)/ 'all' @p@ @t@ determines whether all characters in the 679 -- 'Text' @t@ satisify the predicate @p@. Subject to fusion. 680 all :: (Char -> Bool) -> Text -> Bool 681 -- entered 400 timesall p t = S.all p (stream t) 682 {-# INLINE all #-} 683 684 -- | /O(n)/ 'maximum' returns the maximum value from a 'Text', which 685 -- must be non-empty. Subject to fusion. 686 maximum :: Text -> Char 687 -- entered 400 timesmaximum t = S.maximum (stream t) 688 {-# INLINE maximum #-} 689 690 -- | /O(n)/ 'minimum' returns the minimum value from a 'Text', which 691 -- must be non-empty. Subject to fusion. 692 minimum :: Text -> Char 693 -- entered 400 timesminimum t = S.minimum (stream t) 694 {-# INLINE minimum #-} 695 696 -- ----------------------------------------------------------------------------- 697 -- * Building 'Text's 698 699 -- | /O(n)/ 'scanl' is similar to 'foldl', but returns a list of 700 -- successive reduced values from the left. Subject to fusion. 701 -- 702 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...] 703 -- 704 -- Note that 705 -- 706 -- > last (scanl f z xs) == foldl f z xs. 707 scanl :: (Char -> Char -> Char) -> Char -> Text -> Text 708 -- entered 759 timesscanl f z t = unstream (S.scanl f z (stream t)) 709 {-# INLINE scanl #-} 710 711 -- | /O(n)/ 'scanl1' is a variant of 'scanl' that has no starting 712 -- value argument. Subject to fusion. 713 -- 714 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] 715 scanl1 :: (Char -> Char -> Char) -> Text -> Text 716 -- entered 400 timesscanl1 f t | null t = empty 717 | otherwise = scanl f (unsafeHead t) (unsafeTail t) 718 {-# INLINE scanl1 #-} 719 720 -- | /O(n)/ 'scanr' is the right-to-left dual of 'scanl'. 721 -- 722 -- > scanr f v == reverse . scanl (flip f) v . reverse 723 scanr :: (Char -> Char -> Char) -> Char -> Text -> Text 724 -- entered 467 timesscanr f z = S.reverse . S.reverseScanr f z . reverseStream 725 {-# INLINE scanr #-} 726 727 -- | /O(n)/ 'scanr1' is a variant of 'scanr' that has no starting 728 -- value argument. Subject to fusion. 729 scanr1 :: (Char -> Char -> Char) -> Text -> Text 730 -- entered 400 timesscanr1 f t | null t = empty 731 | otherwise = scanr f (last t) (init t) 732 {-# INLINE scanr1 #-} 733 734 -- | /O(n)/ Like a combination of 'map' and 'foldl'. Applies a 735 -- function to each element of a 'Text', passing an accumulating 736 -- parameter from left to right, and returns a final 'Text'. 737 mapAccumL :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text) 738 -- entered 7768 timesmapAccumL f s t = case uncons t of 739 Nothing -> (s, empty) 740 Just (x, xs) -> (s'', cons y ys) 741 where (s', y ) = f s x 742 (s'',ys) = mapAccumL f s' xs 743 744 -- | The 'mapAccumR' function behaves like a combination of 'map' and 745 -- 'foldr'; it applies a function to each element of a 'Text', passing 746 -- an accumulating parameter from right to left, and returning a final 747 -- value of this accumulator together with the new 'Text'. 748 mapAccumR :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text) 749 -- entered 7087 timesmapAccumR f s t = case uncons t of 750 Nothing -> (s, empty) 751 Just (x, xs) -> (s'', cons y ys) 752 where (s'',y ) = f s' x 753 (s', ys) = mapAccumR f s xs 754 755 -- ----------------------------------------------------------------------------- 756 -- ** Generating and unfolding 'Text's 757 758 -- | /O(n*m)/ 'replicate' @n@ @t@ is a 'Text' consisting of the input 759 -- @t@ repeated @n@ times. 760 replicate :: Int -> Text -> Text 761 -- entered 100 timesreplicate n t@(Text a o l) 762 | n <= 0 || l <= 0 = empty 763 | n == 1 = t 764 | isSingleton t = replicateChar n (unsafeHead t) 765 | otherwise = Text (A.run x) 0 len 766 where 767 len = l * n 768 x = do 769 arr <- A.unsafeNew len 770 let loop !d !i | i >= n = return arr 771 | otherwise = let m = d + l 772 in A.copyI arr d a o m >> loop m (i+1) 773 loop 0 0 774 {-# INLINE [1] replicate #-} 775 776 {-# RULES 777 "TEXT replicate/singleton -> replicateChar" [~1] forall n c. 778 replicate n (singleton c) = replicateChar n c 779 #-} 780 781 -- | /O(n)/ 'replicateChar' @n@ @c@ is a 'Text' of length @n@ with @c@ the 782 -- value of every element. Subject to fusion. 783 replicateChar :: Int -> Char -> Text 784 -- entered 1148 timesreplicateChar n c = unstream (S.replicateCharI n c) 785 {-# INLINE replicateChar #-} 786 787 -- | /O(n)/, where @n@ is the length of the result. The 'unfoldr' 788 -- function is analogous to the List 'L.unfoldr'. 'unfoldr' builds a 789 -- 'Text' from a seed value. The function takes the element and 790 -- returns 'Nothing' if it is done producing the 'Text', otherwise 791 -- 'Just' @(a,b)@. In this case, @a@ is the next 'Char' in the 792 -- string, and @b@ is the seed value for further production. Subject 793 -- to fusion. 794 unfoldr :: (a -> Maybe (Char,a)) -> a -> Text 795 -- entered 100 timesunfoldr f s = unstream (S.unfoldr f s) 796 {-# INLINE unfoldr #-} 797 798 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a 'Text' from a seed 799 -- value. However, the length of the result should be limited by the 800 -- first argument to 'unfoldrN'. This function is more efficient than 801 -- 'unfoldr' when the maximum length of the result is known and 802 -- correct, otherwise its performance is similar to 'unfoldr'. Subject 803 -- to fusion. 804 unfoldrN :: Int -> (a -> Maybe (Char,a)) -> a -> Text 805 -- entered 100 timesunfoldrN n f s = unstream (S.unfoldrN n f s) 806 {-# INLINE unfoldrN #-} 807 808 -- ----------------------------------------------------------------------------- 809 -- * Substrings 810 811 -- | /O(n)/ 'take' @n@, applied to a 'Text', returns the prefix of the 812 -- 'Text' of length @n@, or the 'Text' itself if @n@ is greater than 813 -- the length of the Text. Subject to fusion. 814 take :: Int -> Text -> Text 815 -- entered 2578 timestake n t@(Text arr off len) 816 | n <= 0 = empty 817 | n >= len = t 818 | otherwise = Text arr off (loop 0 0) 819 where 820 loop !i !cnt 821 | i >= len || cnt >= n = i 822 | otherwise = loop (i+d) (cnt+1) 823 where d = iter_ t i 824 {-# INLINE [1] take #-} 825 826 {-# RULES 827 "TEXT take -> fused" [~1] forall n t. 828 take n t = unstream (S.take n (stream t)) 829 "TEXT take -> unfused" [1] forall n t. 830 unstream (S.take n (stream t)) = take n t 831 #-} 832 833 -- | /O(n)/ 'drop' @n@, applied to a 'Text', returns the suffix of the 834 -- 'Text' after the first @n@ characters, or the empty 'Text' if @n@ 835 -- is greater than the length of the 'Text'. Subject to fusion. 836 drop :: Int -> Text -> Text 837 -- entered 3094 timesdrop n t@(Text arr off len) 838 | n <= 0 = t 839 | n >= len = empty 840 | otherwise = loop 0 0 841 where loop !i !cnt 842 | i >= len || cnt >= n = Text arr (off+i) (len-i) 843 | otherwise = loop (i+d) (cnt+1) 844 where d = iter_ t i 845 {-# INLINE [1] drop #-} 846 847 {-# RULES 848 "TEXT drop -> fused" [~1] forall n t. 849 drop n t = unstream (S.drop n (stream t)) 850 "TEXT drop -> unfused" [1] forall n t. 851 unstream (S.drop n (stream t)) = drop n t 852 #-} 853 854 -- | /O(n)/ 'takeWhile', applied to a predicate @p@ and a 'Text', 855 -- returns the longest prefix (possibly empty) of elements that 856 -- satisfy @p@. Subject to fusion. 857 takeWhile :: (Char -> Bool) -> Text -> Text 858 -- entered 400 timestakeWhile p t@(Text arr off len) = loop 0 859 where loop !i | i >= len = t 860 | p c = loop (i+d) 861 | otherwise = textP arr off i 862 where Iter c d = iter t i 863 {-# INLINE [1] takeWhile #-} 864 865 {-# RULES 866 "TEXT takeWhile -> fused" [~1] forall p t. 867 takeWhile p t = unstream (S.takeWhile p (stream t)) 868 "TEXT takeWhile -> unfused" [1] forall p t. 869 unstream (S.takeWhile p (stream t)) = takeWhile p t 870 #-} 871 872 -- | /O(n)/ 'dropWhile' @p@ @t@ returns the suffix remaining after 873 -- 'takeWhile' @p@ @t@. Subject to fusion. 874 dropWhile :: (Char -> Bool) -> Text -> Text 875 -- entered 1200 timesdropWhile p t@(Text arr off len) = loop 0 0 876 where loop !i !l | l >= len = empty 877 | p c = loop (i+d) (l+d) 878 | otherwise = Text arr (off+i) (len-l) 879 where Iter c d = iter t i 880 {-# INLINE [1] dropWhile #-} 881 882 {-# RULES 883 "TEXT dropWhile -> fused" [~1] forall p t. 884 dropWhile p t = unstream (S.dropWhile p (stream t)) 885 "TEXT dropWhile -> unfused" [1] forall p t. 886 unstream (S.dropWhile p (stream t)) = dropWhile p t 887 #-} 888 889 -- | /O(n)/ 'dropWhileEnd' @p@ @t@ returns the prefix remaining after 890 -- dropping characters that fail the predicate @p@ from the end of 891 -- @t@. Subject to fusion. 892 -- Examples: 893 -- 894 -- > dropWhileEnd (=='.') "foo..." == "foo" 895 dropWhileEnd :: (Char -> Bool) -> Text -> Text 896 -- entered 2284 timesdropWhileEnd p t@(Text arr off len) = loop (len-1) len 897 where loop !i !l | l <= 0 = empty 898 | p c = loop (i+d) (l+d) 899 | otherwise = Text arr off l 900 where (c,d) = reverseIter t i 901 {-# INLINE [1] dropWhileEnd #-} 902 903 {-# RULES 904 "TEXT dropWhileEnd -> fused" [~1] forall p t. 905 dropWhileEnd p t = S.reverse (S.dropWhile p (S.reverseStream t)) 906 "TEXT dropWhileEnd -> unfused" [1] forall p t. 907 S.reverse (S.dropWhile p (S.reverseStream t)) = dropWhileEnd p t 908 #-} 909 910 -- | /O(n)/ 'dropAround' @p@ @t@ returns the substring remaining after 911 -- dropping characters that fail the predicate @p@ from both the 912 -- beginning and end of @t@. Subject to fusion. 913 dropAround :: (Char -> Bool) -> Text -> Text 914 -- entered 102 timesdropAround p = dropWhile p . dropWhileEnd p 915 {-# INLINE [1] dropAround #-} 916 917 -- | /O(n)/ Remove leading white space from a string. Equivalent to: 918 -- 919 -- > dropWhile isSpace 920 stripStart :: Text -> Text 921 -- entered oncestripStart = dropWhile isSpace 922 {-# INLINE [1] stripStart #-} 923 924 -- | /O(n)/ Remove trailing white space from a string. Equivalent to: 925 -- 926 -- > dropWhileEnd isSpace 927 stripEnd :: Text -> Text 928 -- entered oncestripEnd = dropWhileEnd isSpace 929 {-# INLINE [1] stripEnd #-} 930 931 -- | /O(n)/ Remove leading and trailing white space from a string. 932 -- Equivalent to: 933 -- 934 -- > dropAround isSpace 935 strip :: Text -> Text 936 -- entered oncestrip = dropAround isSpace 937 {-# INLINE [1] strip #-} 938 939 -- | /O(n)/ 'splitAt' @n t@ returns a pair whose first element is a 940 -- prefix of @t@ of length @n@, and whose second is the remainder of 941 -- the string. It is equivalent to @('take' n t, 'drop' n t)@. 942 splitAt :: Int -> Text -> (Text, Text) 943 -- entered 14,515 timessplitAt n t@(Text arr off len) 944 | n <= 0 = (empty, t) 945 | n >= len = (t, empty) 946 | otherwise = (Text arr off k, Text arr (off+k) (len-k)) 947 where k = loop 0 0 948 loop !i !cnt 949 | i >= len || cnt >= n = i 950 | otherwise = loop (i+d) (cnt+1) 951 where d = iter_ t i 952 {-# INLINE splitAt #-} 953 954 -- | /O(n)/ 'spanBy', applied to a predicate @p@ and text @t@, returns 955 -- a pair whose first element is the longest prefix (possibly empty) 956 -- of @t@ of elements that satisfy @p@, and whose second is the 957 -- remainder of the list. 958 spanBy :: (Char -> Bool) -> Text -> (Text, Text) 959 -- entered 3100 timesspanBy p t@(Text arr off len) = (textP arr off k, textP arr (off+k) (len-k)) 960 where k = loop 0 961 loop !i | i >= len || not (p c) = i 962 | otherwise = loop (i+d) 963 where Iter c d = iter t i 964 {-# INLINE spanBy #-} 965 966 -- | /O(n)/ 'breakBy' is like 'spanBy', but the prefix returned is 967 -- over elements that fail the predicate @p@. 968 breakBy :: (Char -> Bool) -> Text -> (Text, Text) 969 -- entered 1984 timesbreakBy p = spanBy (not . p) 970 {-# INLINE breakBy #-} 971 972 -- | /O(n)/ Group characters in a string according to a predicate. 973 groupBy :: (Char -> Char -> Bool) -> Text -> [Text] 974 -- entered 800 timesgroupBy p = loop 975 where 976 loop t@(Text arr off len) 977 | null t = [] 978 | otherwise = text arr off n : loop (text arr (off+n) (len-n)) 979 where Iter c d = iter t 0 980 n = d + findAIndexOrEnd (not . p c) (Text arr (off+d) (len-d)) 981 982 -- | Returns the /array/ index (in units of 'Word16') at which a 983 -- character may be found. This is /not/ the same as the logical 984 -- index returned by e.g. 'findIndex'. 985 findAIndexOrEnd :: (Char -> Bool) -> Text -> Int 986 -- entered 11,443 timesfindAIndexOrEnd q t@(Text _arr _off len) = go 0 987 where go !i | i >= len || q c = i 988 | otherwise = go (i+d) 989 where Iter c d = iter t i 990 991 -- | /O(n)/ Group characters in a string by equality. 992 group :: Text -> [Text] 993 -- entered oncegroup = groupBy (==) 994 995 -- | /O(n)/ Return all initial segments of the given 'Text', shortest 996 -- first. 997 inits :: Text -> [Text] 998 -- entered 952 timesinits t@(Text arr off len) = loop 0 999 where loop i | i >= len = [t] 1000 | otherwise = Text arr off i : loop (i + iter_ t i) 1001 1002 -- | /O(n)/ Return all final segments of the given 'Text', longest 1003 -- first. 1004 tails :: Text -> [Text] 1005 -- entered 400 timestails t | null t = [empty] 1006 | otherwise = t : tails (unsafeTail t) 1007 1008 -- $split 1009 -- 1010 -- Splitting functions in this library do not perform character-wise 1011 -- copies to create substrings; they just construct new 'Text's that 1012 -- are slices of the original. 1013 1014 -- | /O(m+n)/ Break a 'Text' into pieces separated by the first 1015 -- 'Text' argument, consuming the delimiter. An empty delimiter is 1016 -- invalid, and will cause an error to be raised. 1017 -- 1018 -- Examples: 1019 -- 1020 -- > split "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"] 1021 -- > split "aaa" "aaaXaaaXaaaXaaa" == ["","X","X","X",""] 1022 -- > split "x" "x" == ["",""] 1023 -- 1024 -- and 1025 -- 1026 -- > intercalate s . split s == id 1027 -- > split (singleton c) == splitBy (==c) 1028 -- 1029 -- In (unlikely) bad cases, this function's time complexity degrades 1030 -- towards /O(n*m)/. 1031 split :: Text -> Text -> [Text] 1032 -- entered 900 timessplit pat@(Text _ _ l) src@(Text arr off len) 1033 | l <= 0 = emptyError "split" 1034 | isSingleton pat = splitBy (== unsafeHead pat) src 1035 | otherwise = go 0 (indices pat src) 1036 where 1037 go !s (x:xs) = textP arr (s+off) (x-s) : go (x+l) xs 1038 go s _ = [textP arr (s+off) (len-s)] 1039 {-# INLINE [1] split #-} 1040 1041 {-# RULES 1042 "TEXT split/singleton -> splitBy/==" [~1] forall c t. 1043 split (singleton c) t = splitBy (==c) t 1044 #-} 1045 1046 -- | /O(n)/ Splits a 'Text' into components delimited by separators, 1047 -- where the predicate returns True for a separator element. The 1048 -- resulting components do not contain the separators. Two adjacent 1049 -- separators result in an empty component in the output. eg. 1050 -- 1051 -- > splitBy (=='a') "aabbaca" == ["","","bb","c",""] 1052 -- > splitBy (=='a') "" == [""] 1053 splitBy :: (Char -> Bool) -> Text -> [Text] 1054 -- entered 1488 timessplitBy _ t@(Text _off _arr 0) = [t] 1055 splitBy p t = loop t 1056 where loop s | null s' = [l] 1057 | otherwise = l : loop (unsafeTail s') 1058 where (l, s') = breakBy p s 1059 {-# INLINE splitBy #-} 1060 1061 -- | /O(n)/ Splits a 'Text' into components of length @k@. The last 1062 -- element may be shorter than the other chunks, depending on the 1063 -- length of the input. Examples: 1064 -- 1065 -- > chunksOf 3 "foobarbaz" == ["foo","bar","baz"] 1066 -- > chunksOf 4 "haskell.org" == ["hask","ell.","org"] 1067 chunksOf :: Int -> Text -> [Text] 1068 -- entered 300 timeschunksOf k = go 1069 where 1070 go t = case splitAt k t of 1071 (a,b) | null a -> [] 1072 | otherwise -> a : go b 1073 {-# INLINE chunksOf #-} 1074 1075 -- ---------------------------------------------------------------------------- 1076 -- * Searching 1077 1078 ------------------------------------------------------------------------------- 1079 -- ** Searching with a predicate 1080 1081 -- | /O(n)/ The 'findBy' function takes a predicate and a 'Text', and 1082 -- returns the first element in matching the predicate, or 'Nothing' 1083 -- if there is no such element. 1084 findBy :: (Char -> Bool) -> Text -> Maybe Char 1085 -- entered 400 timesfindBy p t = S.findBy p (stream t) 1086 {-# INLINE findBy #-} 1087 1088 -- | /O(n)/ The 'partitionBy' function takes a predicate and a 'Text', 1089 -- and returns the pair of 'Text's with elements which do and do not 1090 -- satisfy the predicate, respectively; i.e. 1091 -- 1092 -- > partitionBy p t == (filter p t, filter (not . p) t) 1093 partitionBy :: (Char -> Bool) -> Text -> (Text, Text) 1094 -- entered 400 timespartitionBy p t = (filter p t, filter (not . p) t) 1095 {-# INLINE partitionBy #-} 1096 1097 -- | /O(n)/ 'filter', applied to a predicate and a 'Text', 1098 -- returns a 'Text' containing those characters that satisfy the 1099 -- predicate. 1100 filter :: (Char -> Bool) -> Text -> Text 1101 -- entered 4315 timesfilter p t = unstream (S.filter p (stream t)) 1102 {-# INLINE filter #-} 1103 1104 -- | /O(n+m)/ Find the first instance of @needle@ (which must be 1105 -- non-'null') in @haystack@. The first element of the returned tuple 1106 -- is the prefix of @haystack@ before @needle@ is matched. The second 1107 -- is the remainder of @haystack@, starting with the match. 1108 -- 1109 -- Examples: 1110 -- 1111 -- > break "::" "a::b::c" ==> ("a", "::b::c") 1112 -- > break "/" "foobar" ==> ("foobar", "") 1113 -- 1114 -- Laws: 1115 -- 1116 -- > append prefix match == haystack 1117 -- > where (prefix, match) = break needle haystack 1118 -- 1119 -- If you need to break a string by a substring repeatedly (e.g. you 1120 -- want to break on every instance of a substring), use 'find' 1121 -- instead, as it has lower startup overhead. 1122 -- 1123 -- In (unlikely) bad cases, this function's time complexity degrades 1124 -- towards /O(n*m)/. 1125 break :: Text -> Text -> (Text, Text) 1126 -- entered 300 timesbreak pat src@(Text arr off len) 1127 | null pat = emptyError "break" 1128 | otherwise = case indices pat src of 1129 [] -> (src, empty) 1130 (x:_) -> (textP arr off x, textP arr (off+x) (len-x)) 1131 {-# INLINE break #-} 1132 1133 -- | /O(n+m)/ Similar to 'break', but searches from the end of the string. 1134 -- 1135 -- The first element of the returned tuple is the prefix of @haystack@ 1136 -- up to and including the last match of @needle@. The second is the 1137 -- remainder of @haystack@, following the match. 1138 -- 1139 -- > breakEnd "::" "a::b::c" ==> ("a::b::", "c") 1140 breakEnd :: Text -> Text -> (Text, Text) 1141 -- entered 100 timesbreakEnd pat src = let (a,b) = break (reverse pat) (reverse src) 1142 in (reverse b, reverse a) 1143 {-# INLINE breakEnd #-} 1144 1145 -- | /O(n+m)/ Find all non-overlapping instances of @needle@ in 1146 -- @haystack@. Each element of the returned list consists of a pair: 1147 -- 1148 -- * The entire string prior to the /k/th match (i.e. the prefix) 1149 -- 1150 -- * The /k/th match, followed by the remainder of the string 1151 -- 1152 -- Examples: 1153 -- 1154 -- > find "::" "" 1155 -- > ==> [] 1156 -- > find "/" "a/b/c/" 1157 -- > ==> [("a", "/b/c/"), ("a/b", "/c/"), ("a/b/c", "/")] 1158 -- 1159 -- In (unlikely) bad cases, this function's time complexity degrades 1160 -- towards /O(n*m)/. 1161 -- 1162 -- The @needle@ parameter may not be empty. 1163 find :: Text -- ^ @needle@ to search for 1164 -> Text -- ^ @haystack@ in which to search 1165 -> [(Text, Text)] 1166 -- entered 300 timesfind pat src@(Text arr off slen) 1167 | null pat = emptyError "find" 1168 | otherwise = L.map step (indices pat src) 1169 where 1170 step x = (chunk 0 x, chunk x (slen-x)) 1171 chunk !n !l = textP arr (n+off) l 1172 {-# INLINE find #-} 1173 1174 ------------------------------------------------------------------------------- 1175 -- ** Indexing 'Text's 1176 1177 -- $index 1178 -- 1179 -- If you think of a 'Text' value as an array of 'Char' values (which 1180 -- it is not), you run the risk of writing inefficient code. 1181 -- 1182 -- An idiom that is common in some languages is to find the numeric 1183 -- offset of a character or substring, then use that number to split 1184 -- or trim the searched string. With a 'Text' value, this approach 1185 -- would require two /O(n)/ operations: one to perform the search, and 1186 -- one to operate from wherever the search ended. 1187 -- 1188 -- For example, suppose you have a string that you want to split on 1189 -- the substring @\"::\"@, such as @\"foo::bar::quux\"@. Instead of 1190 -- searching for the index of @\"::\"@ and taking the substrings 1191 -- before and after that index, you would instead use @find \"::\"@. 1192 1193 -- | /O(n)/ 'Text' index (subscript) operator, starting from 0. 1194 index :: Text -> Int -> Char 1195 -- entered 100 timesindex t n = S.index (stream t) n 1196 {-# INLINE index #-} 1197 1198 -- | /O(n)/ The 'findIndex' function takes a predicate and a 'Text' 1199 -- and returns the index of the first element in the 'Text' satisfying 1200 -- the predicate. Subject to fusion. 1201 findIndex :: (Char -> Bool) -> Text -> Maybe Int 1202 -- entered 12,673 timesfindIndex p t = S.findIndex p (stream t) 1203 {-# INLINE findIndex #-} 1204 1205 -- | /O(n+m)/ The 'count' function returns the number of times the 1206 -- query string appears in the given 'Text'. An empty query string is 1207 -- invalid, and will cause an error to be raised. 1208 -- 1209 -- In (unlikely) bad cases, this function's time complexity degrades 1210 -- towards /O(n*m)/. 1211 count :: Text -> Text -> Int 1212 -- entered 300 timescount pat src 1213 | null pat = emptyError "count" 1214 | isSingleton pat = countChar (unsafeHead pat) src 1215 | otherwise = L.length (indices pat src) 1216 {-# INLINE [1] count #-} 1217 1218 {-# RULES 1219 "TEXT count/singleton -> countChar" [~1] forall c t. 1220 count (singleton c) t = countChar c t 1221 #-} 1222 1223 -- | /O(n)/ The 'countChar' function returns the number of times the 1224 -- query element appears in the given 'Text'. Subject to fusion. 1225 countChar :: Char -> Text -> Int 1226 -- entered 107 timescountChar c t = S.countChar c (stream t) 1227 {-# INLINE countChar #-} 1228 1229 ------------------------------------------------------------------------------- 1230 -- * Zipping 1231 1232 -- | /O(n)/ 'zip' takes two 'Text's and returns a list of 1233 -- corresponding pairs of bytes. If one input 'Text' is short, 1234 -- excess elements of the longer 'Text' are discarded. This is 1235 -- equivalent to a pair of 'unpack' operations. 1236 zip :: Text -> Text -> [(Char,Char)] 1237 -- entered 400 timeszip a b = S.unstreamList $ S.zipWith (,) (stream a) (stream b) 1238 {-# INLINE [0] zip #-} 1239 1240 -- | /O(n)/ 'zipWith' generalises 'zip' by zipping with the function 1241 -- given as the first argument, instead of a tupling function. 1242 zipWith :: (Char -> Char -> Char) -> Text -> Text -> Text 1243 -- entered 400 timeszipWith f t1 t2 = unstream (S.zipWith f (stream t1) (stream t2)) 1244 {-# INLINE [0] zipWith #-} 1245 1246 -- | /O(n)/ Breaks a 'Text' up into a list of words, delimited by 'Char's 1247 -- representing white space. 1248 words :: Text -> [Text] 1249 -- entered 400 timeswords t@(Text arr off len) = loop 0 0 1250 where 1251 loop !start !n 1252 | n >= len = if start == n 1253 then [] 1254 else [Text arr (start+off) (n-start)] 1255 | isSpace c = 1256 if start == n 1257 then loop (start+1) (start+1) 1258 else Text arr (start+off) (n-start) : loop (n+d) (n+d) 1259 | otherwise = loop start (n+d) 1260 where Iter c d = iter t n 1261 {-# INLINE words #-} 1262 1263 -- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at 1264 -- newline 'Char's. The resulting strings do not contain newlines. 1265 lines :: Text -> [Text] 1266 -- entered 457 timeslines ps | null ps = [] 1267 | otherwise = h : if null t 1268 then [] 1269 else lines (unsafeTail t) 1270 where (h,t) = spanBy (/= '\n') ps 1271 {-# INLINE lines #-} 1272 1273 {- 1274 -- | /O(n)/ Portably breaks a 'Text' up into a list of 'Text's at line 1275 -- boundaries. 1276 -- 1277 -- A line boundary is considered to be either a line feed, a carriage 1278 -- return immediately followed by a line feed, or a carriage return. 1279 -- This accounts for both Unix and Windows line ending conventions, 1280 -- and for the old convention used on Mac OS 9 and earlier. 1281 lines' :: Text -> [Text] 1282 lines' ps | null ps = [] 1283 | otherwise = h : case uncons t of 1284 Nothing -> [] 1285 Just (c,t') 1286 | c == '\n' -> lines t' 1287 | c == '\r' -> case uncons t' of 1288 Just ('\n',t'') -> lines t'' 1289 _ -> lines t' 1290 where (h,t) = span notEOL ps 1291 notEOL c = c /= '\n' && c /= '\r' 1292 {-# INLINE lines' #-} 1293 -} 1294 1295 -- | /O(n)/ Joins lines, after appending a terminating newline to 1296 -- each. 1297 unlines :: [Text] -> Text 1298 -- entered 101 timesunlines = concat . L.map (`snoc` '\n') 1299 {-# INLINE unlines #-} 1300 1301 -- | /O(n)/ Joins words using single space characters. 1302 unwords :: [Text] -> Text 1303 -- entered 100 timesunwords = intercalate (singleton ' ') 1304 {-# INLINE unwords #-} 1305 1306 -- | /O(n)/ The 'isPrefixOf' function takes two 'Text's and returns 1307 -- 'True' iff the first is a prefix of the second. This function is 1308 -- subject to fusion. 1309 isPrefixOf :: Text -> Text -> Bool 1310 -- entered 2575 timesisPrefixOf a@(Text _ _ alen) b@(Text _ _ blen) = 1311 alen <= blen && S.isPrefixOf (stream a) (stream b) 1312 {-# INLINE [1] isPrefixOf #-} 1313 1314 {-# RULES 1315 "TEXT isPrefixOf -> fused" [~1] forall s t. 1316 isPrefixOf s t = S.isPrefixOf (stream s) (stream t) 1317 "TEXT isPrefixOf -> unfused" [1] forall s t. 1318 S.isPrefixOf (stream s) (stream t) = isPrefixOf s t 1319 #-} 1320 1321 -- | /O(n)/ The 'isSuffixOf' function takes two 'Text's and returns 1322 -- 'True' iff the first is a suffix of the second. 1323 isSuffixOf :: Text -> Text -> Bool 1324 -- entered 900 timesisSuffixOf a@(Text _aarr _aoff alen) b@(Text barr boff blen) = 1325 d >= 0 && a == b' 1326 where d = blen - alen 1327 b' | d == 0 = b 1328 | otherwise = Text barr (boff+d) alen 1329 {-# INLINE isSuffixOf #-} 1330 1331 -- | /O(n+m)/ The 'isInfixOf' function takes two 'Text's and returns 1332 -- 'True' iff the first is contained, wholly and intact, anywhere 1333 -- within the second. 1334 -- 1335 -- In (unlikely) bad cases, this function's time complexity degrades 1336 -- towards /O(n*m)/. 1337 isInfixOf :: Text -> Text -> Bool 1338 -- entered 400 timesisInfixOf needle haystack 1339 | null needle = True 1340 | isSingleton needle = S.elem (unsafeHead needle) . S.stream $ haystack 1341 | otherwise = not . L.null . indices needle $ haystack 1342 {-# INLINE [1] isInfixOf #-} 1343 1344 {-# RULES 1345 "TEXT isInfixOf/singleton -> S.elem/S.stream" [~1] forall n h. 1346 isInfixOf (singleton n) h = S.elem n (S.stream h) 1347 #-} 1348 1349 ------------------------------------------------------------------------------- 1350 -- * View patterns 1351 1352 -- | /O(n)/ Returns the suffix of the second string if its prefix 1353 -- matches the first. 1354 -- 1355 -- Examples: 1356 -- 1357 -- > prefixed "foo" "foobar" == Just "bar" 1358 -- > prefixed "foo" "quux" == Nothing 1359 -- 1360 -- This is particularly useful with the @ViewPatterns@ extension to 1361 -- GHC, as follows: 1362 -- 1363 -- > {-# LANGUAGE ViewPatterns #-} 1364 -- > import Data.Text as T 1365 -- > 1366 -- > fnordLength :: Text -> Int 1367 -- > fnordLength (prefixed "fnord" -> Just suf) = T.length suf 1368 -- > fnordLength _ = -1 1369 prefixed :: Text -> Text -> Maybe Text 1370 -- entered 400 timesprefixed p@(Text _arr _off plen) t@(Text arr off len) 1371 | p `isPrefixOf` t = Just $! textP arr (off+plen) (len-plen) 1372 | otherwise = Nothing 1373 1374 -- | /O(n)/ Returns the prefix of the second string if its suffix 1375 -- matches the first. 1376 -- 1377 -- Examples: 1378 -- 1379 -- > suffixed "bar" "foobar" == Just "foo" 1380 -- > suffixed "foo" "quux" == Nothing 1381 -- 1382 -- This is particularly useful with the @ViewPatterns@ extension to 1383 -- GHC, as follows: 1384 -- 1385 -- > {-# LANGUAGE ViewPatterns #-} 1386 -- > import Data.Text as T 1387 -- > 1388 -- > quuxLength :: Text -> Int 1389 -- > quuxLength (suffixed "quux" -> Just pre) = T.length pre 1390 -- > quuxLength _ = -1 1391 suffixed :: Text -> Text -> Maybe Text 1392 -- entered 400 timessuffixed p@(Text _arr _off plen) t@(Text arr off len) 1393 | p `isSuffixOf` t = Just $! textP arr off (len-plen) 1394 | otherwise = Nothing 1395 1396 emptyError :: String -> a 1397 -- entered 63 timesemptyError fun = P.error ("Data.Text." ++ fun ++ ": empty input")