1 {-# LANGUAGE BangPatterns, CPP, Rank2Types #-}
    2 {-# OPTIONS_GHC -fno-warn-orphans #-}
    3 
    4 -- |
    5 -- Module      : Data.Text
    6 -- Copyright   : (c) 2008, 2009 Tom Harper,
    7 --               (c) 2009, 2010 Bryan O'Sullivan,
    8 --               (c) 2009 Duncan Coutts
    9 --
   10 -- License     : BSD-style
   11 -- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
   12 --               duncan@haskell.org
   13 -- Stability   : experimental
   14 -- Portability : GHC
   15 --
   16 -- A time and space-efficient implementation of Unicode text using
   17 -- packed Word16 arrays.  Suitable for performance critical use, both
   18 -- in terms of large data quantities and high speed.
   19 --
   20 -- This module is intended to be imported @qualified@, to avoid name
   21 -- clashes with "Prelude" functions, e.g.
   22 --
   23 -- > import qualified Data.Text as T
   24 
   25 module Data.Text
   26     (
   27     -- * Fusion
   28     -- $fusion
   29 
   30     -- * Types
   31       Text
   32 
   33     -- * Creation and elimination
   34     , pack
   35     , unpack
   36     , singleton
   37     , empty
   38 
   39     -- * Basic interface
   40     , cons
   41     , snoc
   42     , append
   43     , uncons
   44     , head
   45     , last
   46     , tail
   47     , init
   48     , null
   49     , length
   50     , compareLength
   51 
   52     -- * Transformations
   53     , map
   54     , intercalate
   55     , intersperse
   56     , transpose
   57     , reverse
   58     , replace
   59 
   60     -- ** Case conversion
   61     -- $case
   62     , toCaseFold
   63     , toLower
   64     , toUpper
   65 
   66     -- ** Justification
   67     , justifyLeft
   68     , justifyRight
   69     , center
   70 
   71     -- * Folds
   72     , foldl
   73     , foldl'
   74     , foldl1
   75     , foldl1'
   76     , foldr
   77     , foldr1
   78 
   79     -- ** Special folds
   80     , concat
   81     , concatMap
   82     , any
   83     , all
   84     , maximum
   85     , minimum
   86 
   87     -- * Construction
   88 
   89     -- ** Scans
   90     , scanl
   91     , scanl1
   92     , scanr
   93     , scanr1
   94 
   95     -- ** Accumulating maps
   96     , mapAccumL
   97     , mapAccumR
   98 
   99     -- ** Generation and unfolding
  100     , replicate
  101     , unfoldr
  102     , unfoldrN
  103 
  104     -- * Substrings
  105 
  106     -- ** Breaking strings
  107     , take
  108     , drop
  109     , takeWhile
  110     , dropWhile
  111     , dropWhileEnd
  112     , dropAround
  113     , strip
  114     , stripStart
  115     , stripEnd
  116     , splitAt
  117     , spanBy
  118     , break
  119     , breakEnd
  120     , breakBy
  121     , group
  122     , groupBy
  123     , inits
  124     , tails
  125 
  126     -- ** Breaking into many substrings
  127     -- $split
  128     , split
  129     , splitBy
  130     , chunksOf
  131 
  132     -- ** Breaking into lines and words
  133     , lines
  134     --, lines'
  135     , words
  136     , unlines
  137     , unwords
  138 
  139     -- * Predicates
  140     , isPrefixOf
  141     , isSuffixOf
  142     , isInfixOf
  143 
  144     -- ** View patterns
  145     , prefixed
  146     , suffixed
  147 
  148     -- * Searching
  149     , filter
  150     , find
  151     , findBy
  152     , partitionBy
  153 
  154     -- , findSubstring
  155     
  156     -- * Indexing
  157     -- $index
  158     , index
  159     , findIndex
  160     , count
  161 
  162     -- * Zipping and unzipping
  163     , zip
  164     , zipWith
  165 
  166     -- -* Ordered text
  167     -- , sort
  168     ) where
  169 
  170 import Prelude (Char, Bool(..), Functor(..), Int, Maybe(..), String,
  171                 Eq(..), Ord(..), Ordering(..), (++),
  172                 Read(..), Show(..),
  173                 (&&), (||), (+), (-), (.), ($), ($!), (>>), (*),
  174                 div, error, not, return, otherwise)
  175 #if defined(HAVE_DEEPSEQ)
  176 import Control.DeepSeq (NFData)
  177 #endif
  178 #if defined(ASSERTS)
  179 import Control.Exception (assert)
  180 #endif
  181 import Data.Char (isSpace)
  182 import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf))
  183 #if __GLASGOW_HASKELL__ >= 612
  184 import Data.Data (mkNoRepType)
  185 #else
  186 import Data.Data (mkNorepType)
  187 #endif
  188 import Control.Monad (foldM)
  189 import qualified Data.Text.Array as A
  190 import qualified Data.List as L
  191 import Data.Monoid (Monoid(..))
  192 import Data.String (IsString(..))
  193 import qualified Data.Text.Fusion as S
  194 import qualified Data.Text.Fusion.Common as S
  195 import Data.Text.Fusion (stream, reverseStream, unstream)
  196 import Data.Text.Internal (Text(..), empty, text, textP)
  197 import qualified Prelude as P
  198 import Data.Text.Unsafe (Iter(..), iter, iter_, lengthWord16, reverseIter,
  199                          unsafeHead, unsafeTail)
  200 import Data.Text.UnsafeChar (unsafeChr)
  201 import qualified Data.Text.Encoding.Utf16 as U16
  202 import Data.Text.Search (indices)
  203 
  204 -- $fusion
  205 --
  206 -- Most of the functions in this module are subject to /fusion/,
  207 -- meaning that a pipeline of such functions will usually allocate at
  208 -- most one 'Text' value.
  209 
  210 instance Eq Text where
  211     -- entered 7765 timest1 == t2 = stream t1 == stream t2
  212     {-# INLINE (==) #-}
  213 
  214 instance Ord Text where
  215     -- entered 100 timescompare t1 t2 = compare (stream t1) (stream t2)
  216     {-# INLINE compare #-}
  217 
  218 instance Show Text where
  219     -- entered 100 timesshowsPrec p ps r = showsPrec p (unpack ps) r
  220 
  221 instance Read Text where
  222     -- entered 100 timesreadsPrec p str = [(pack x,y) | (x,y) <- readsPrec p str]
  223 
  224 instance Monoid Text where
  225     -- never enteredmempty  = empty
  226     -- entered 100 timesmappend = append
  227     -- entered 100 timesmconcat = concat
  228 
  229 instance IsString Text where
  230     -- entered 401 timesfromString = pack
  231 
  232 #if defined(HAVE_DEEPSEQ)
  233 instance NFData Text
  234 #endif
  235 
  236 -- This instance preserves data abstraction at the cost of inefficiency.
  237 -- We omit reflection services for the sake of data abstraction.
  238 -- 
  239 -- This instance was created by copying the behavior of Data.Set and
  240 -- Data.Map. If you feel a mistake has been made, please feel free to
  241 -- submit improvements.
  242 --
  243 -- Original discussion is archived here:
  244 
  245 --  "could we get a Data instance for Data.Text.Text?"
  246 --  http://groups.google.com/group/haskell-cafe/browse_thread/thread/b5bbb1b28a7e525d/0639d46852575b93
  247 
  248 instance Data Text where
  249   -- never enteredgfoldl f z txt = z pack `f` (unpack txt)
  250   -- never enteredtoConstr _     = error "Data.Text.Text.toConstr"
  251   -- never enteredgunfold _ _    = error "Data.Text.Text.gunfold"
  252 #if __GLASGOW_HASKELL__ >= 612
  253   -- never entereddataTypeOf _   = mkNoRepType "Data.Text.Text"
  254 #else
  255   dataTypeOf _   = mkNorepType "Data.Text.Text"
  256 #endif
  257 
  258 -- -----------------------------------------------------------------------------
  259 -- * Conversion to/from 'Text'
  260 
  261 -- | /O(n)/ Convert a 'String' into a 'Text'.  Subject to fusion.
  262 pack :: String -> Text
  263 -- entered 52,163 timespack = unstream . S.streamList
  264 {-# INLINE [1] pack #-}
  265 
  266 -- | /O(n)/ Convert a Text into a String.  Subject to fusion.
  267 unpack :: Text -> String
  268 -- entered 21,661 timesunpack = S.unstreamList . stream
  269 {-# INLINE [1] unpack #-}
  270 
  271 -- | /O(1)/ Convert a character into a Text.
  272 -- Subject to fusion.
  273 singleton :: Char -> Text
  274 -- entered 803 timessingleton = unstream . S.singleton
  275 {-# INLINE [1] singleton #-}
  276 
  277 -- -----------------------------------------------------------------------------
  278 -- * Basic functions
  279 
  280 -- | /O(n)/ Adds a character to the front of a 'Text'.  This function
  281 -- is more costly than its 'List' counterpart because it requires
  282 -- copying a new array.  Subject to fusion.
  283 cons :: Char -> Text -> Text
  284 -- entered 14,455 timescons c t = unstream (S.cons c (stream t))
  285 {-# INLINE cons #-}
  286 
  287 -- | /O(n)/ Adds a character to the end of a 'Text'.  This copies the
  288 -- entire array in the process, unless fused.  Subject to fusion.
  289 snoc :: Text -> Char -> Text
  290 -- entered 5340 timessnoc t c = unstream (S.snoc (stream t) c)
  291 {-# INLINE snoc #-}
  292 
  293 -- | /O(n)/ Appends one 'Text' to the other by copying both of them
  294 -- into a new 'Text'.  Subject to fusion.
  295 append :: Text -> Text -> Text
  296 -- entered 2355 timesappend (Text arr1 off1 len1) (Text arr2 off2 len2) = Text (A.run x) 0 len
  297     where
  298       len = len1+len2
  299       x = do
  300         arr <- A.unsafeNew len
  301         A.copyI arr 0 arr1 off1 len1
  302         A.copyI arr len1 arr2 off2 (len1+len2)
  303         return arr
  304 {-# INLINE append #-}
  305 
  306 {-# RULES
  307 "TEXT append -> fused" [~1] forall t1 t2.
  308     append t1 t2 = unstream (S.append (stream t1) (stream t2))
  309 "TEXT append -> unfused" [1] forall t1 t2.
  310     unstream (S.append (stream t1) (stream t2)) = append t1 t2
  311  #-}
  312 
  313 -- | /O(1)/ Returns the first character of a 'Text', which must be
  314 -- non-empty.  Subject to fusion.
  315 head :: Text -> Char
  316 -- entered 400 timeshead t = S.head (stream t)
  317 {-# INLINE head #-}
  318 
  319 -- | /O(1)/ Returns the first character and rest of a 'Text', or
  320 -- 'Nothing' if empty. Subject to fusion.
  321 uncons :: Text -> Maybe (Char, Text)
  322 -- entered 400 timesuncons t@(Text arr off len)
  323     | len <= 0  = Nothing
  324     | otherwise = Just (c, textP arr (off+d) (len-d))
  325     where Iter c d = iter t 0
  326 {-# INLINE [1] uncons #-}
  327 
  328 -- | Lifted from Control.Arrow and specialized.
  329 second :: (b -> c) -> (a,b) -> (a,c)
  330 -- entered 14,550 timessecond f (a, b) = (a, f b)
  331 
  332 {-# RULES
  333 "TEXT uncons -> fused" [~1] forall t.
  334     uncons t = fmap (second unstream) (S.uncons (stream t))
  335 "TEXT uncons -> unfused" [1] forall t.
  336     fmap (second unstream) (S.uncons (stream t)) = uncons t
  337   #-}
  338 
  339 -- | /O(1)/ Returns the last character of a 'Text', which must be
  340 -- non-empty.  Subject to fusion.
  341 last :: Text -> Char
  342 -- entered 1507 timeslast (Text arr off len)
  343     | len <= 0                 = emptyError "last"
  344     | n < 0xDC00 || n > 0xDFFF = unsafeChr n
  345     | otherwise                = U16.chr2 n0 n
  346     where n  = A.unsafeIndex arr (off+len-1)
  347           n0 = A.unsafeIndex arr (off+len-2)
  348 {-# INLINE [1] last #-}
  349 
  350 {-# RULES
  351 "TEXT last -> fused" [~1] forall t.
  352     last t = S.last (stream t)
  353 "TEXT last -> unfused" [1] forall t.
  354     S.last (stream t) = last t
  355   #-}
  356 
  357 -- | /O(1)/ Returns all characters after the head of a 'Text', which
  358 -- must be non-empty.  Subject to fusion.
  359 tail :: Text -> Text
  360 -- entered 758 timestail t@(Text arr off len)
  361     | len <= 0  = emptyError "tail"
  362     | otherwise = textP arr (off+d) (len-d)
  363     where d = iter_ t 0
  364 {-# INLINE [1] tail #-}
  365 
  366 {-# RULES
  367 "TEXT tail -> fused" [~1] forall t.
  368     tail t = unstream (S.tail (stream t))
  369 "TEXT tail -> unfused" [1] forall t.
  370     unstream (S.tail (stream t)) = tail t
  371  #-}
  372 
  373 -- | /O(1)/ Returns all but the last character of a 'Text', which must
  374 -- be non-empty.  Subject to fusion.
  375 init :: Text -> Text
  376 -- entered 1138 timesinit (Text arr off len) | len <= 0                   = emptyError "init"
  377                         | n >= 0xDC00 && n <= 0xDFFF = textP arr off (len-2)
  378                         | otherwise                  = textP arr off (len-1)
  379     where
  380       n = A.unsafeIndex arr (off+len-1)
  381 {-# INLINE [1] init #-}
  382 
  383 {-# RULES
  384 "TEXT init -> fused" [~1] forall t.
  385     init t = unstream (S.init (stream t))
  386 "TEXT init -> unfused" [1] forall t.
  387     unstream (S.init (stream t)) = init t
  388  #-}
  389 
  390 -- | /O(1)/ Tests whether a 'Text' is empty or not.  Subject to
  391 -- fusion.
  392 null :: Text -> Bool
  393 -- entered 63,523 timesnull (Text _arr _off len) =
  394 #if defined(ASSERTS)
  395     assert (len >= 0) $
  396 #endif
  397     len <= 0
  398 {-# INLINE [1] null #-}
  399 
  400 {-# RULES
  401 "TEXT null -> fused" [~1] forall t.
  402     null t = S.null (stream t)
  403 "TEXT null -> unfused" [1] forall t.
  404     S.null (stream t) = null t
  405  #-}
  406 
  407 -- | /O(1)/ Tests whether a 'Text' contains exactly one character.
  408 -- Subject to fusion.
  409 isSingleton :: Text -> Bool
  410 -- entered 1596 timesisSingleton = S.isSingleton . stream
  411 {-# INLINE isSingleton #-}
  412 
  413 -- | /O(n)/ Returns the number of characters in a 'Text'.
  414 -- Subject to fusion.
  415 length :: Text -> Int
  416 -- entered 37,788 timeslength t = S.length (stream t)
  417 {-# INLINE length #-}
  418 
  419 -- | /O(n)/ Compare the count of characters in a 'Text' to a number.
  420 -- Subject to fusion.
  421 --
  422 -- This function gives the same answer as comparing against the result
  423 -- of 'length', but can short circuit if the count of characters is
  424 -- greater than the number, and hence be more efficient.
  425 compareLength :: Text -> Int -> Ordering
  426 -- entered 100 timescompareLength t n = S.compareLengthI (stream t) n
  427 {-# INLINE [1] compareLength #-}
  428 
  429 {-# RULES
  430 "TEXT compareN/length -> compareLength" [~1] forall t n.
  431     compare (length t) n = compareLength t n
  432   #-}
  433 
  434 {-# RULES
  435 "TEXT ==N/length -> compareLength/==EQ" [~1] forall t n.
  436     (==) (length t) n = compareLength t n == EQ
  437   #-}
  438 
  439 {-# RULES
  440 "TEXT /=N/length -> compareLength//=EQ" [~1] forall t n.
  441     (/=) (length t) n = compareLength t n /= EQ
  442   #-}
  443 
  444 {-# RULES
  445 "TEXT <N/length -> compareLength/==LT" [~1] forall t n.
  446     (<) (length t) n = compareLength t n == LT
  447   #-}
  448 
  449 {-# RULES
  450 "TEXT <=N/length -> compareLength//=GT" [~1] forall t n.
  451     (<=) (length t) n = compareLength t n /= GT
  452   #-}
  453 
  454 {-# RULES
  455 "TEXT >N/length -> compareLength/==GT" [~1] forall t n.
  456     (>) (length t) n = compareLength t n == GT
  457   #-}
  458 
  459 {-# RULES
  460 "TEXT >=N/length -> compareLength//=LT" [~1] forall t n.
  461     (>=) (length t) n = compareLength t n /= LT
  462   #-}
  463 
  464 -- -----------------------------------------------------------------------------
  465 -- * Transformations
  466 -- | /O(n)/ 'map' @f@ @t@ is the 'Text' obtained by applying @f@ to
  467 -- each element of @t@.  Subject to fusion.
  468 map :: (Char -> Char) -> Text -> Text
  469 -- entered 500 timesmap f t = unstream (S.map f (stream t))
  470 {-# INLINE [1] map #-}
  471 
  472 -- | /O(n)/ The 'intercalate' function takes a 'Text' and a list of
  473 -- 'Text's and concatenates the list after interspersing the first
  474 -- argument between each element of the list.
  475 intercalate :: Text -> [Text] -> Text
  476 -- entered 900 timesintercalate t = concat . (L.intersperse t)
  477 {-# INLINE intercalate #-}
  478 
  479 -- | /O(n)/ The 'intersperse' function takes a character and places it
  480 -- between the characters of a 'Text'.  Subject to fusion.
  481 intersperse     :: Char -> Text -> Text
  482 -- entered 400 timesintersperse c t = unstream (S.intersperse c (stream t))
  483 {-# INLINE intersperse #-}
  484 
  485 -- | /O(n)/ Reverse the characters of a string. Subject to fusion.
  486 reverse :: Text -> Text
  487 -- entered 4385 timesreverse t = S.reverse (stream t)
  488 {-# INLINE reverse #-}
  489 
  490 -- | /O(m+n)/ Replace every occurrence of one substring with another.
  491 replace :: Text                 -- ^ Text to search for
  492         -> Text                 -- ^ Replacement text
  493         -> Text                 -- ^ Input text
  494         -> Text
  495 -- entered 100 timesreplace s d = intercalate d . split s
  496 {-# INLINE replace #-}
  497 
  498 -- ----------------------------------------------------------------------------
  499 -- ** Case conversions (folds)
  500 
  501 -- $case
  502 --
  503 -- When case converting 'Text' values, do not use combinators like
  504 -- @map toUpper@ to case convert each character of a string
  505 -- individually, as this gives incorrect results according to the
  506 -- rules of some writing systems.  The whole-string case conversion
  507 -- functions from this module, such as @toUpper@, obey the correct
  508 -- case conversion rules.  As a result, these functions may map one
  509 -- input character to two or three output characters. For examples,
  510 -- see the documentation of each function.
  511 --
  512 -- /Note/: In some languages, case conversion is a locale- and
  513 -- context-dependent operation. The case conversion functions in this
  514 -- module are /not/ locale sensitive. Programs that require locale
  515 -- sensitivity should use appropriate versions of the case mapping
  516 -- functions from the @text-icu@ package.
  517 
  518 -- | /O(n)/ Convert a string to folded case.  This function is mainly
  519 -- useful for performing caseless (also known as case insensitive)
  520 -- string comparisons.
  521 --
  522 -- A string @x@ is a caseless match for a string @y@ if and only if:
  523 --
  524 -- @toCaseFold x == toCaseFold y@
  525 --
  526 -- The result string may be longer than the input string, and may
  527 -- differ from applying 'toLower' to the input string.  For instance,
  528 -- the Armenian small ligature \"&#xfb13;\" (men now, U+FB13) is case
  529 -- folded to the sequence \"&#x574;\" (men, U+0574) followed by
  530 -- \"&#x576;\" (now, U+0576), while the Greek \"&#xb5;\" (micro sign,
  531 -- U+00B5) is case folded to \"&#x3bc;\" (small letter mu, U+03BC)
  532 -- instead of itself.
  533 toCaseFold :: Text -> Text
  534 -- entered 100 timestoCaseFold t = unstream (S.toCaseFold (stream t))
  535 {-# INLINE [0] toCaseFold #-}
  536 
  537 -- | /O(n)/ Convert a string to lower case, using simple case
  538 -- conversion.  The result string may be longer than the input string.
  539 -- For instance, \"&#x130;\" (Latin capital letter I with dot above,
  540 -- U+0130) maps to the sequence \"i\" (Latin small letter i, U+0069) followed
  541 -- by \" &#x307;\" (combining dot above, U+0307).
  542 toLower :: Text -> Text
  543 -- entered 200 timestoLower t = unstream (S.toLower (stream t))
  544 {-# INLINE toLower #-}
  545 
  546 -- | /O(n)/ Convert a string to upper case, using simple case
  547 -- conversion.  The result string may be longer than the input string.
  548 -- For instance, the German \"&#xdf;\" (eszett, U+00DF) maps to the
  549 -- two-letter sequence \"SS\".
  550 toUpper :: Text -> Text
  551 -- entered 200 timestoUpper t = unstream (S.toUpper (stream t))
  552 {-# INLINE toUpper #-}
  553 
  554 -- | /O(n)/ Left-justify a string to the given length, using the
  555 -- specified fill character on the right. Subject to fusion. Examples:
  556 --
  557 -- > justifyLeft 7 'x' "foo"    == "fooxxxx"
  558 -- > justifyLeft 3 'x' "foobar" == "foobar"
  559 justifyLeft :: Int -> Char -> Text -> Text
  560 -- entered 400 timesjustifyLeft k c t
  561     | len >= k  = t
  562     | otherwise = t `append` replicateChar (k-len) c
  563   where len = length t
  564 {-# INLINE [1] justifyLeft #-}
  565 
  566 {-# RULES
  567 "TEXT justifyLeft -> fused" [~1] forall k c t.
  568     justifyLeft k c t = unstream (S.justifyLeftI k c (stream t))
  569 "TEXT justifyLeft -> unfused" [1] forall k c t.
  570     unstream (S.justifyLeftI k c (stream t)) = justifyLeft k c t
  571   #-}
  572 
  573 -- | /O(n)/ Right-justify a string to the given length, using the
  574 -- specified fill character on the left. Examples:
  575 --
  576 -- > justifyRight 7 'x' "bar"    == "xxxxbar"
  577 -- > justifyRight 3 'x' "foobar" == "foobar"
  578 justifyRight :: Int -> Char -> Text -> Text
  579 -- entered 400 timesjustifyRight k c t
  580     | len >= k  = t
  581     | otherwise = replicateChar (k-len) c `append` t
  582   where len = length t
  583 {-# INLINE justifyRight #-}
  584 
  585 -- | /O(n)/ Center a string to the given length, using the
  586 -- specified fill character on either side. Examples:
  587 --
  588 -- > center 8 'x' "HS" = "xxxHSxxx"
  589 center :: Int -> Char -> Text -> Text
  590 -- entered 400 timescenter k c t
  591     | len >= k  = t
  592     | otherwise = replicateChar l c `append` t `append` replicateChar r c
  593   where len = length t
  594         d   = k - len
  595         r   = d `div` 2
  596         l   = d - r
  597 {-# INLINE center #-}
  598 
  599 -- | /O(n)/ The 'transpose' function transposes the rows and columns
  600 -- of its 'Text' argument.  Note that this function uses 'pack',
  601 -- 'unpack', and the list version of transpose, and is thus not very
  602 -- efficient.
  603 transpose :: [Text] -> [Text]
  604 -- entered 100 timestranspose ts = P.map pack (L.transpose (P.map unpack ts))
  605 
  606 -- -----------------------------------------------------------------------------
  607 -- * Reducing 'Text's (folds)
  608 
  609 -- | /O(n)/ 'foldl', applied to a binary operator, a starting value
  610 -- (typically the left-identity of the operator), and a 'Text',
  611 -- reduces the 'Text' using the binary operator, from left to right.
  612 -- Subject to fusion.
  613 foldl :: (b -> Char -> b) -> b -> Text -> b
  614 -- entered 400 timesfoldl f z t = S.foldl f z (stream t)
  615 {-# INLINE foldl #-}
  616 
  617 -- | /O(n)/ A strict version of 'foldl'.  Subject to fusion.
  618 foldl' :: (b -> Char -> b) -> b -> Text -> b
  619 -- entered 400 timesfoldl' f z t = S.foldl' f z (stream t)
  620 {-# INLINE foldl' #-}
  621 
  622 -- | /O(n)/ A variant of 'foldl' that has no starting value argument,
  623 -- and thus must be applied to a non-empty 'Text'.  Subject to fusion.
  624 foldl1 :: (Char -> Char -> Char) -> Text -> Char
  625 -- entered 400 timesfoldl1 f t = S.foldl1 f (stream t)
  626 {-# INLINE foldl1 #-}
  627 
  628 -- | /O(n)/ A strict version of 'foldl1'.  Subject to fusion.
  629 foldl1' :: (Char -> Char -> Char) -> Text -> Char
  630 -- entered 400 timesfoldl1' f t = S.foldl1' f (stream t)
  631 {-# INLINE foldl1' #-}
  632 
  633 -- | /O(n)/ 'foldr', applied to a binary operator, a starting value
  634 -- (typically the right-identity of the operator), and a 'Text',
  635 -- reduces the 'Text' using the binary operator, from right to left.
  636 -- Subject to fusion.
  637 foldr :: (Char -> b -> b) -> b -> Text -> b
  638 -- entered 800 timesfoldr f z t = S.foldr f z (stream t)
  639 {-# INLINE foldr #-}
  640 
  641 -- | /O(n)/ A variant of 'foldr' that has no starting value argument,
  642 -- and thust must be applied to a non-empty 'Text'.  Subject to
  643 -- fusion.
  644 foldr1 :: (Char -> Char -> Char) -> Text -> Char
  645 -- entered 400 timesfoldr1 f t = S.foldr1 f (stream t)
  646 {-# INLINE foldr1 #-}
  647 
  648 -- -----------------------------------------------------------------------------
  649 -- ** Special folds
  650 
  651 -- | /O(n)/ Concatenate a list of 'Text's.
  652 concat :: [Text] -> Text
  653 -- entered 2488 timesconcat ts = case ts' of
  654               [] -> empty
  655               [t] -> t
  656               _ -> Text (A.run go) 0 len
  657   where
  658     ts' = L.filter (not . null) ts
  659     len = L.sum $ L.map lengthWord16 ts'
  660     go = do
  661       arr <- A.unsafeNew len
  662       let step i (Text a o l) =
  663             let !j = i + l in A.copyI arr i a o j >> return j
  664       foldM step 0 ts' >> return arr
  665 
  666 -- | /O(n)/ Map a function over a 'Text' that results in a 'Text', and
  667 -- concatenate the results.
  668 concatMap :: (Char -> Text) -> Text -> Text
  669 -- entered 100 timesconcatMap f = concat . foldr ((:) . f) []
  670 {-# INLINE concatMap #-}
  671 
  672 -- | /O(n)/ 'any' @p@ @t@ determines whether any character in the
  673 -- 'Text' @t@ satisifes the predicate @p@. Subject to fusion.
  674 any :: (Char -> Bool) -> Text -> Bool
  675 -- entered 400 timesany p t = S.any p (stream t)
  676 {-# INLINE any #-}
  677 
  678 -- | /O(n)/ 'all' @p@ @t@ determines whether all characters in the
  679 -- 'Text' @t@ satisify the predicate @p@. Subject to fusion.
  680 all :: (Char -> Bool) -> Text -> Bool
  681 -- entered 400 timesall p t = S.all p (stream t)
  682 {-# INLINE all #-}
  683 
  684 -- | /O(n)/ 'maximum' returns the maximum value from a 'Text', which
  685 -- must be non-empty. Subject to fusion.
  686 maximum :: Text -> Char
  687 -- entered 400 timesmaximum t = S.maximum (stream t)
  688 {-# INLINE maximum #-}
  689 
  690 -- | /O(n)/ 'minimum' returns the minimum value from a 'Text', which
  691 -- must be non-empty. Subject to fusion.
  692 minimum :: Text -> Char
  693 -- entered 400 timesminimum t = S.minimum (stream t)
  694 {-# INLINE minimum #-}
  695 
  696 -- -----------------------------------------------------------------------------
  697 -- * Building 'Text's
  698 
  699 -- | /O(n)/ 'scanl' is similar to 'foldl', but returns a list of
  700 -- successive reduced values from the left. Subject to fusion.
  701 --
  702 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
  703 --
  704 -- Note that
  705 --
  706 -- > last (scanl f z xs) == foldl f z xs.
  707 scanl :: (Char -> Char -> Char) -> Char -> Text -> Text
  708 -- entered 759 timesscanl f z t = unstream (S.scanl f z (stream t))
  709 {-# INLINE scanl #-}
  710 
  711 -- | /O(n)/ 'scanl1' is a variant of 'scanl' that has no starting
  712 -- value argument.  Subject to fusion.
  713 --
  714 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
  715 scanl1 :: (Char -> Char -> Char) -> Text -> Text
  716 -- entered 400 timesscanl1 f t | null t    = empty
  717            | otherwise = scanl f (unsafeHead t) (unsafeTail t)
  718 {-# INLINE scanl1 #-}
  719 
  720 -- | /O(n)/ 'scanr' is the right-to-left dual of 'scanl'.
  721 --
  722 -- > scanr f v == reverse . scanl (flip f) v . reverse
  723 scanr :: (Char -> Char -> Char) -> Char -> Text -> Text
  724 -- entered 467 timesscanr f z = S.reverse . S.reverseScanr f z . reverseStream
  725 {-# INLINE scanr #-}
  726 
  727 -- | /O(n)/ 'scanr1' is a variant of 'scanr' that has no starting
  728 -- value argument.  Subject to fusion.
  729 scanr1 :: (Char -> Char -> Char) -> Text -> Text
  730 -- entered 400 timesscanr1 f t | null t    = empty
  731            | otherwise = scanr f (last t) (init t)
  732 {-# INLINE scanr1 #-}
  733 
  734 -- | /O(n)/ Like a combination of 'map' and 'foldl'. Applies a
  735 -- function to each element of a 'Text', passing an accumulating
  736 -- parameter from left to right, and returns a final 'Text'.
  737 mapAccumL :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text)
  738 -- entered 7768 timesmapAccumL f s t = case uncons t of
  739                     Nothing -> (s, empty)
  740                     Just (x, xs) -> (s'', cons y ys)
  741                         where (s', y ) = f s x
  742                               (s'',ys) = mapAccumL f s' xs
  743 
  744 -- | The 'mapAccumR' function behaves like a combination of 'map' and
  745 -- 'foldr'; it applies a function to each element of a 'Text', passing
  746 -- an accumulating parameter from right to left, and returning a final
  747 -- value of this accumulator together with the new 'Text'.
  748 mapAccumR :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text)
  749 -- entered 7087 timesmapAccumR f s t = case uncons t of
  750                     Nothing -> (s, empty)
  751                     Just (x, xs) ->  (s'', cons y ys)
  752                         where (s'',y ) = f s' x
  753                               (s', ys) = mapAccumR f s xs
  754 
  755 -- -----------------------------------------------------------------------------
  756 -- ** Generating and unfolding 'Text's
  757 
  758 -- | /O(n*m)/ 'replicate' @n@ @t@ is a 'Text' consisting of the input
  759 -- @t@ repeated @n@ times.
  760 replicate :: Int -> Text -> Text
  761 -- entered 100 timesreplicate n t@(Text a o l)
  762     | n <= 0 || l <= 0 = empty
  763     | n == 1           = t
  764     | isSingleton t    = replicateChar n (unsafeHead t)
  765     | otherwise        = Text (A.run x) 0 len
  766   where
  767     len = l * n
  768     x = do
  769       arr <- A.unsafeNew len
  770       let loop !d !i | i >= n    = return arr
  771                      | otherwise = let m = d + l
  772                                    in A.copyI arr d a o m >> loop m (i+1)
  773       loop 0 0
  774 {-# INLINE [1] replicate #-}
  775 
  776 {-# RULES
  777 "TEXT replicate/singleton -> replicateChar" [~1] forall n c.
  778     replicate n (singleton c) = replicateChar n c
  779   #-}
  780 
  781 -- | /O(n)/ 'replicateChar' @n@ @c@ is a 'Text' of length @n@ with @c@ the
  782 -- value of every element. Subject to fusion.
  783 replicateChar :: Int -> Char -> Text
  784 -- entered 1148 timesreplicateChar n c = unstream (S.replicateCharI n c)
  785 {-# INLINE replicateChar #-}
  786 
  787 -- | /O(n)/, where @n@ is the length of the result. The 'unfoldr'
  788 -- function is analogous to the List 'L.unfoldr'. 'unfoldr' builds a
  789 -- 'Text' from a seed value. The function takes the element and
  790 -- returns 'Nothing' if it is done producing the 'Text', otherwise
  791 -- 'Just' @(a,b)@.  In this case, @a@ is the next 'Char' in the
  792 -- string, and @b@ is the seed value for further production. Subject
  793 -- to fusion.
  794 unfoldr     :: (a -> Maybe (Char,a)) -> a -> Text
  795 -- entered 100 timesunfoldr f s = unstream (S.unfoldr f s)
  796 {-# INLINE unfoldr #-}
  797 
  798 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a 'Text' from a seed
  799 -- value. However, the length of the result should be limited by the
  800 -- first argument to 'unfoldrN'. This function is more efficient than
  801 -- 'unfoldr' when the maximum length of the result is known and
  802 -- correct, otherwise its performance is similar to 'unfoldr'. Subject
  803 -- to fusion.
  804 unfoldrN     :: Int -> (a -> Maybe (Char,a)) -> a -> Text
  805 -- entered 100 timesunfoldrN n f s = unstream (S.unfoldrN n f s)
  806 {-# INLINE unfoldrN #-}
  807 
  808 -- -----------------------------------------------------------------------------
  809 -- * Substrings
  810 
  811 -- | /O(n)/ 'take' @n@, applied to a 'Text', returns the prefix of the
  812 -- 'Text' of length @n@, or the 'Text' itself if @n@ is greater than
  813 -- the length of the Text. Subject to fusion.
  814 take :: Int -> Text -> Text
  815 -- entered 2578 timestake n t@(Text arr off len)
  816     | n <= 0    = empty
  817     | n >= len  = t
  818     | otherwise = Text arr off (loop 0 0)
  819   where
  820       loop !i !cnt
  821            | i >= len || cnt >= n = i
  822            | otherwise            = loop (i+d) (cnt+1)
  823            where d = iter_ t i
  824 {-# INLINE [1] take #-}
  825 
  826 {-# RULES
  827 "TEXT take -> fused" [~1] forall n t.
  828     take n t = unstream (S.take n (stream t))
  829 "TEXT take -> unfused" [1] forall n t.
  830     unstream (S.take n (stream t)) = take n t
  831   #-}
  832 
  833 -- | /O(n)/ 'drop' @n@, applied to a 'Text', returns the suffix of the
  834 -- 'Text' after the first @n@ characters, or the empty 'Text' if @n@
  835 -- is greater than the length of the 'Text'. Subject to fusion.
  836 drop :: Int -> Text -> Text
  837 -- entered 3094 timesdrop n t@(Text arr off len)
  838     | n <= 0    = t
  839     | n >= len  = empty
  840     | otherwise = loop 0 0
  841   where loop !i !cnt
  842             | i >= len || cnt >= n   = Text arr (off+i) (len-i)
  843             | otherwise              = loop (i+d) (cnt+1)
  844             where d = iter_ t i
  845 {-# INLINE [1] drop #-}
  846 
  847 {-# RULES
  848 "TEXT drop -> fused" [~1] forall n t.
  849     drop n t = unstream (S.drop n (stream t))
  850 "TEXT drop -> unfused" [1] forall n t.
  851     unstream (S.drop n (stream t)) = drop n t
  852   #-}
  853 
  854 -- | /O(n)/ 'takeWhile', applied to a predicate @p@ and a 'Text',
  855 -- returns the longest prefix (possibly empty) of elements that
  856 -- satisfy @p@.  Subject to fusion.
  857 takeWhile :: (Char -> Bool) -> Text -> Text
  858 -- entered 400 timestakeWhile p t@(Text arr off len) = loop 0
  859   where loop !i | i >= len    = t
  860                 | p c         = loop (i+d)
  861                 | otherwise   = textP arr off i
  862             where Iter c d    = iter t i
  863 {-# INLINE [1] takeWhile #-}
  864 
  865 {-# RULES
  866 "TEXT takeWhile -> fused" [~1] forall p t.
  867     takeWhile p t = unstream (S.takeWhile p (stream t))
  868 "TEXT takeWhile -> unfused" [1] forall p t.
  869     unstream (S.takeWhile p (stream t)) = takeWhile p t
  870   #-}
  871 
  872 -- | /O(n)/ 'dropWhile' @p@ @t@ returns the suffix remaining after
  873 -- 'takeWhile' @p@ @t@. Subject to fusion.
  874 dropWhile :: (Char -> Bool) -> Text -> Text
  875 -- entered 1200 timesdropWhile p t@(Text arr off len) = loop 0 0
  876   where loop !i !l | l >= len  = empty
  877                    | p c       = loop (i+d) (l+d)
  878                    | otherwise = Text arr (off+i) (len-l)
  879             where Iter c d     = iter t i
  880 {-# INLINE [1] dropWhile #-}
  881 
  882 {-# RULES
  883 "TEXT dropWhile -> fused" [~1] forall p t.
  884     dropWhile p t = unstream (S.dropWhile p (stream t))
  885 "TEXT dropWhile -> unfused" [1] forall p t.
  886     unstream (S.dropWhile p (stream t)) = dropWhile p t
  887   #-}
  888 
  889 -- | /O(n)/ 'dropWhileEnd' @p@ @t@ returns the prefix remaining after
  890 -- dropping characters that fail the predicate @p@ from the end of
  891 -- @t@.  Subject to fusion.
  892 -- Examples:
  893 --
  894 -- > dropWhileEnd (=='.') "foo..." == "foo"
  895 dropWhileEnd :: (Char -> Bool) -> Text -> Text
  896 -- entered 2284 timesdropWhileEnd p t@(Text arr off len) = loop (len-1) len
  897   where loop !i !l | l <= 0    = empty
  898                    | p c       = loop (i+d) (l+d)
  899                    | otherwise = Text arr off l
  900             where (c,d)        = reverseIter t i
  901 {-# INLINE [1] dropWhileEnd #-}
  902 
  903 {-# RULES
  904 "TEXT dropWhileEnd -> fused" [~1] forall p t.
  905     dropWhileEnd p t = S.reverse (S.dropWhile p (S.reverseStream t))
  906 "TEXT dropWhileEnd -> unfused" [1] forall p t.
  907     S.reverse (S.dropWhile p (S.reverseStream t)) = dropWhileEnd p t
  908   #-}
  909 
  910 -- | /O(n)/ 'dropAround' @p@ @t@ returns the substring remaining after
  911 -- dropping characters that fail the predicate @p@ from both the
  912 -- beginning and end of @t@.  Subject to fusion.
  913 dropAround :: (Char -> Bool) -> Text -> Text
  914 -- entered 102 timesdropAround p = dropWhile p . dropWhileEnd p
  915 {-# INLINE [1] dropAround #-}
  916 
  917 -- | /O(n)/ Remove leading white space from a string.  Equivalent to:
  918 --
  919 -- > dropWhile isSpace
  920 stripStart :: Text -> Text
  921 -- entered oncestripStart = dropWhile isSpace
  922 {-# INLINE [1] stripStart #-}
  923 
  924 -- | /O(n)/ Remove trailing white space from a string.  Equivalent to:
  925 --
  926 -- > dropWhileEnd isSpace
  927 stripEnd :: Text -> Text
  928 -- entered oncestripEnd = dropWhileEnd isSpace
  929 {-# INLINE [1] stripEnd #-}
  930 
  931 -- | /O(n)/ Remove leading and trailing white space from a string.
  932 -- Equivalent to:
  933 --
  934 -- > dropAround isSpace
  935 strip :: Text -> Text
  936 -- entered oncestrip = dropAround isSpace
  937 {-# INLINE [1] strip #-}
  938 
  939 -- | /O(n)/ 'splitAt' @n t@ returns a pair whose first element is a
  940 -- prefix of @t@ of length @n@, and whose second is the remainder of
  941 -- the string. It is equivalent to @('take' n t, 'drop' n t)@.
  942 splitAt :: Int -> Text -> (Text, Text)
  943 -- entered 14,515 timessplitAt n t@(Text arr off len)
  944     | n <= 0    = (empty, t)
  945     | n >= len  = (t, empty)
  946     | otherwise = (Text arr off k, Text arr (off+k) (len-k))
  947   where k = loop 0 0
  948         loop !i !cnt
  949             | i >= len || cnt >= n = i
  950             | otherwise            = loop (i+d) (cnt+1)
  951             where d                = iter_ t i
  952 {-# INLINE splitAt #-}
  953 
  954 -- | /O(n)/ 'spanBy', applied to a predicate @p@ and text @t@, returns
  955 -- a pair whose first element is the longest prefix (possibly empty)
  956 -- of @t@ of elements that satisfy @p@, and whose second is the
  957 -- remainder of the list.
  958 spanBy :: (Char -> Bool) -> Text -> (Text, Text)
  959 -- entered 3100 timesspanBy p t@(Text arr off len) = (textP arr off k, textP arr (off+k) (len-k))
  960   where k = loop 0
  961         loop !i | i >= len || not (p c) = i
  962                 | otherwise             = loop (i+d)
  963             where Iter c d              = iter t i
  964 {-# INLINE spanBy #-}
  965 
  966 -- | /O(n)/ 'breakBy' is like 'spanBy', but the prefix returned is
  967 -- over elements that fail the predicate @p@.
  968 breakBy :: (Char -> Bool) -> Text -> (Text, Text)
  969 -- entered 1984 timesbreakBy p = spanBy (not . p)
  970 {-# INLINE breakBy #-}
  971 
  972 -- | /O(n)/ Group characters in a string according to a predicate.
  973 groupBy :: (Char -> Char -> Bool) -> Text -> [Text]
  974 -- entered 800 timesgroupBy p = loop
  975   where
  976     loop t@(Text arr off len)
  977         | null t    = []
  978         | otherwise = text arr off n : loop (text arr (off+n) (len-n))
  979         where Iter c d = iter t 0
  980               n     = d + findAIndexOrEnd (not . p c) (Text arr (off+d) (len-d))
  981 
  982 -- | Returns the /array/ index (in units of 'Word16') at which a
  983 -- character may be found.  This is /not/ the same as the logical
  984 -- index returned by e.g. 'findIndex'.
  985 findAIndexOrEnd :: (Char -> Bool) -> Text -> Int
  986 -- entered 11,443 timesfindAIndexOrEnd q t@(Text _arr _off len) = go 0
  987     where go !i | i >= len || q c       = i
  988                 | otherwise             = go (i+d)
  989                 where Iter c d          = iter t i
  990     
  991 -- | /O(n)/ Group characters in a string by equality.
  992 group :: Text -> [Text]
  993 -- entered oncegroup = groupBy (==)
  994 
  995 -- | /O(n)/ Return all initial segments of the given 'Text', shortest
  996 -- first.
  997 inits :: Text -> [Text]
  998 -- entered 952 timesinits t@(Text arr off len) = loop 0
  999     where loop i | i >= len = [t]
 1000                  | otherwise = Text arr off i : loop (i + iter_ t i)
 1001 
 1002 -- | /O(n)/ Return all final segments of the given 'Text', longest
 1003 -- first.
 1004 tails :: Text -> [Text]
 1005 -- entered 400 timestails t | null t    = [empty]
 1006         | otherwise = t : tails (unsafeTail t)
 1007 
 1008 -- $split
 1009 --
 1010 -- Splitting functions in this library do not perform character-wise
 1011 -- copies to create substrings; they just construct new 'Text's that
 1012 -- are slices of the original.
 1013 
 1014 -- | /O(m+n)/ Break a 'Text' into pieces separated by the first
 1015 -- 'Text' argument, consuming the delimiter. An empty delimiter is
 1016 -- invalid, and will cause an error to be raised.
 1017 --
 1018 -- Examples:
 1019 --
 1020 -- > split "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
 1021 -- > split "aaa"  "aaaXaaaXaaaXaaa"  == ["","X","X","X",""]
 1022 -- > split "x"    "x"                == ["",""]
 1023 -- 
 1024 -- and
 1025 --
 1026 -- > intercalate s . split s         == id
 1027 -- > split (singleton c)             == splitBy (==c)
 1028 --
 1029 -- In (unlikely) bad cases, this function's time complexity degrades
 1030 -- towards /O(n*m)/.
 1031 split :: Text -> Text -> [Text]
 1032 -- entered 900 timessplit pat@(Text _ _ l) src@(Text arr off len)
 1033     | l <= 0          = emptyError "split"
 1034     | isSingleton pat = splitBy (== unsafeHead pat) src
 1035     | otherwise       = go 0 (indices pat src)
 1036   where
 1037     go !s (x:xs) =  textP arr (s+off) (x-s) : go (x+l) xs
 1038     go  s _      = [textP arr (s+off) (len-s)]
 1039 {-# INLINE [1] split #-}
 1040 
 1041 {-# RULES
 1042 "TEXT split/singleton -> splitBy/==" [~1] forall c t.
 1043     split (singleton c) t = splitBy (==c) t
 1044   #-}
 1045 
 1046 -- | /O(n)/ Splits a 'Text' into components delimited by separators,
 1047 -- where the predicate returns True for a separator element.  The
 1048 -- resulting components do not contain the separators.  Two adjacent
 1049 -- separators result in an empty component in the output.  eg.
 1050 --
 1051 -- > splitBy (=='a') "aabbaca" == ["","","bb","c",""]
 1052 -- > splitBy (=='a') ""        == [""]
 1053 splitBy :: (Char -> Bool) -> Text -> [Text]
 1054 -- entered 1488 timessplitBy _ t@(Text _off _arr 0) = [t]
 1055 splitBy p t = loop t
 1056     where loop s | null s'   = [l]
 1057                  | otherwise = l : loop (unsafeTail s')
 1058               where (l, s') = breakBy p s
 1059 {-# INLINE splitBy #-}
 1060 
 1061 -- | /O(n)/ Splits a 'Text' into components of length @k@.  The last
 1062 -- element may be shorter than the other chunks, depending on the
 1063 -- length of the input. Examples:
 1064 --
 1065 -- > chunksOf 3 "foobarbaz"   == ["foo","bar","baz"]
 1066 -- > chunksOf 4 "haskell.org" == ["hask","ell.","org"]
 1067 chunksOf :: Int -> Text -> [Text]
 1068 -- entered 300 timeschunksOf k = go
 1069   where
 1070     go t = case splitAt k t of
 1071              (a,b) | null a    -> []
 1072                    | otherwise -> a : go b
 1073 {-# INLINE chunksOf #-}
 1074 
 1075 -- ----------------------------------------------------------------------------
 1076 -- * Searching
 1077 
 1078 -------------------------------------------------------------------------------
 1079 -- ** Searching with a predicate
 1080 
 1081 -- | /O(n)/ The 'findBy' function takes a predicate and a 'Text', and
 1082 -- returns the first element in matching the predicate, or 'Nothing'
 1083 -- if there is no such element.
 1084 findBy :: (Char -> Bool) -> Text -> Maybe Char
 1085 -- entered 400 timesfindBy p t = S.findBy p (stream t)
 1086 {-# INLINE findBy #-}
 1087 
 1088 -- | /O(n)/ The 'partitionBy' function takes a predicate and a 'Text',
 1089 -- and returns the pair of 'Text's with elements which do and do not
 1090 -- satisfy the predicate, respectively; i.e.
 1091 --
 1092 -- > partitionBy p t == (filter p t, filter (not . p) t)
 1093 partitionBy :: (Char -> Bool) -> Text -> (Text, Text)
 1094 -- entered 400 timespartitionBy p t = (filter p t, filter (not . p) t)
 1095 {-# INLINE partitionBy #-}
 1096 
 1097 -- | /O(n)/ 'filter', applied to a predicate and a 'Text',
 1098 -- returns a 'Text' containing those characters that satisfy the
 1099 -- predicate.
 1100 filter :: (Char -> Bool) -> Text -> Text
 1101 -- entered 4315 timesfilter p t = unstream (S.filter p (stream t))
 1102 {-# INLINE filter #-}
 1103 
 1104 -- | /O(n+m)/ Find the first instance of @needle@ (which must be
 1105 -- non-'null') in @haystack@.  The first element of the returned tuple
 1106 -- is the prefix of @haystack@ before @needle@ is matched.  The second
 1107 -- is the remainder of @haystack@, starting with the match.
 1108 --
 1109 -- Examples:
 1110 --
 1111 -- > break "::" "a::b::c" ==> ("a", "::b::c")
 1112 -- > break "/" "foobar"   ==> ("foobar", "")
 1113 --
 1114 -- Laws:
 1115 --
 1116 -- > append prefix match == haystack
 1117 -- >   where (prefix, match) = break needle haystack
 1118 --
 1119 -- If you need to break a string by a substring repeatedly (e.g. you
 1120 -- want to break on every instance of a substring), use 'find'
 1121 -- instead, as it has lower startup overhead.
 1122 --
 1123 -- In (unlikely) bad cases, this function's time complexity degrades
 1124 -- towards /O(n*m)/.
 1125 break :: Text -> Text -> (Text, Text)
 1126 -- entered 300 timesbreak pat src@(Text arr off len)
 1127     | null pat  = emptyError "break"
 1128     | otherwise = case indices pat src of
 1129                     []    -> (src, empty)
 1130                     (x:_) -> (textP arr off x, textP arr (off+x) (len-x))
 1131 {-# INLINE break #-}
 1132 
 1133 -- | /O(n+m)/ Similar to 'break', but searches from the end of the string.
 1134 --
 1135 -- The first element of the returned tuple is the prefix of @haystack@
 1136 -- up to and including the last match of @needle@.  The second is the
 1137 -- remainder of @haystack@, following the match.
 1138 --
 1139 -- > breakEnd "::" "a::b::c" ==> ("a::b::", "c")
 1140 breakEnd :: Text -> Text -> (Text, Text)
 1141 -- entered 100 timesbreakEnd pat src = let (a,b) = break (reverse pat) (reverse src)
 1142                    in  (reverse b, reverse a)
 1143 {-# INLINE breakEnd #-}
 1144 
 1145 -- | /O(n+m)/ Find all non-overlapping instances of @needle@ in
 1146 -- @haystack@.  Each element of the returned list consists of a pair:
 1147 --
 1148 -- * The entire string prior to the /k/th match (i.e. the prefix)
 1149 --
 1150 -- * The /k/th match, followed by the remainder of the string
 1151 --
 1152 -- Examples:
 1153 --
 1154 -- > find "::" ""
 1155 -- > ==> []
 1156 -- > find "/" "a/b/c/"
 1157 -- > ==> [("a", "/b/c/"), ("a/b", "/c/"), ("a/b/c", "/")]
 1158 --
 1159 -- In (unlikely) bad cases, this function's time complexity degrades
 1160 -- towards /O(n*m)/.
 1161 --
 1162 -- The @needle@ parameter may not be empty.
 1163 find :: Text                    -- ^ @needle@ to search for
 1164      -> Text                    -- ^ @haystack@ in which to search
 1165      -> [(Text, Text)]
 1166 -- entered 300 timesfind pat src@(Text arr off slen)
 1167     | null pat  = emptyError "find"
 1168     | otherwise = L.map step (indices pat src)
 1169   where
 1170     step       x = (chunk 0 x, chunk x (slen-x))
 1171     chunk !n !l  = textP arr (n+off) l
 1172 {-# INLINE find #-}
 1173 
 1174 -------------------------------------------------------------------------------
 1175 -- ** Indexing 'Text's
 1176 
 1177 -- $index
 1178 --
 1179 -- If you think of a 'Text' value as an array of 'Char' values (which
 1180 -- it is not), you run the risk of writing inefficient code.
 1181 --
 1182 -- An idiom that is common in some languages is to find the numeric
 1183 -- offset of a character or substring, then use that number to split
 1184 -- or trim the searched string.  With a 'Text' value, this approach
 1185 -- would require two /O(n)/ operations: one to perform the search, and
 1186 -- one to operate from wherever the search ended.
 1187 --
 1188 -- For example, suppose you have a string that you want to split on
 1189 -- the substring @\"::\"@, such as @\"foo::bar::quux\"@. Instead of
 1190 -- searching for the index of @\"::\"@ and taking the substrings
 1191 -- before and after that index, you would instead use @find \"::\"@.
 1192 
 1193 -- | /O(n)/ 'Text' index (subscript) operator, starting from 0.
 1194 index :: Text -> Int -> Char
 1195 -- entered 100 timesindex t n = S.index (stream t) n
 1196 {-# INLINE index #-}
 1197 
 1198 -- | /O(n)/ The 'findIndex' function takes a predicate and a 'Text'
 1199 -- and returns the index of the first element in the 'Text' satisfying
 1200 -- the predicate. Subject to fusion.
 1201 findIndex :: (Char -> Bool) -> Text -> Maybe Int
 1202 -- entered 12,673 timesfindIndex p t = S.findIndex p (stream t)
 1203 {-# INLINE findIndex #-}
 1204 
 1205 -- | /O(n+m)/ The 'count' function returns the number of times the
 1206 -- query string appears in the given 'Text'. An empty query string is
 1207 -- invalid, and will cause an error to be raised.
 1208 --
 1209 -- In (unlikely) bad cases, this function's time complexity degrades
 1210 -- towards /O(n*m)/.
 1211 count :: Text -> Text -> Int
 1212 -- entered 300 timescount pat src
 1213     | null pat        = emptyError "count"
 1214     | isSingleton pat = countChar (unsafeHead pat) src
 1215     | otherwise       = L.length (indices pat src)
 1216 {-# INLINE [1] count #-}
 1217 
 1218 {-# RULES
 1219 "TEXT count/singleton -> countChar" [~1] forall c t.
 1220     count (singleton c) t = countChar c t
 1221   #-}
 1222 
 1223 -- | /O(n)/ The 'countChar' function returns the number of times the
 1224 -- query element appears in the given 'Text'. Subject to fusion.
 1225 countChar :: Char -> Text -> Int
 1226 -- entered 107 timescountChar c t = S.countChar c (stream t)
 1227 {-# INLINE countChar #-}
 1228 
 1229 -------------------------------------------------------------------------------
 1230 -- * Zipping
 1231 
 1232 -- | /O(n)/ 'zip' takes two 'Text's and returns a list of
 1233 -- corresponding pairs of bytes. If one input 'Text' is short,
 1234 -- excess elements of the longer 'Text' are discarded. This is
 1235 -- equivalent to a pair of 'unpack' operations.
 1236 zip :: Text -> Text -> [(Char,Char)]
 1237 -- entered 400 timeszip a b = S.unstreamList $ S.zipWith (,) (stream a) (stream b)
 1238 {-# INLINE [0] zip #-}
 1239 
 1240 -- | /O(n)/ 'zipWith' generalises 'zip' by zipping with the function
 1241 -- given as the first argument, instead of a tupling function.
 1242 zipWith :: (Char -> Char -> Char) -> Text -> Text -> Text
 1243 -- entered 400 timeszipWith f t1 t2 = unstream (S.zipWith f (stream t1) (stream t2))
 1244 {-# INLINE [0] zipWith #-}
 1245 
 1246 -- | /O(n)/ Breaks a 'Text' up into a list of words, delimited by 'Char's
 1247 -- representing white space.
 1248 words :: Text -> [Text]
 1249 -- entered 400 timeswords t@(Text arr off len) = loop 0 0
 1250   where
 1251     loop !start !n
 1252         | n >= len = if start == n
 1253                      then []
 1254                      else [Text arr (start+off) (n-start)]
 1255         | isSpace c =
 1256             if start == n
 1257             then loop (start+1) (start+1)
 1258             else Text arr (start+off) (n-start) : loop (n+d) (n+d)
 1259         | otherwise = loop start (n+d)
 1260         where Iter c d = iter t n
 1261 {-# INLINE words #-}
 1262 
 1263 -- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at
 1264 -- newline 'Char's. The resulting strings do not contain newlines.
 1265 lines :: Text -> [Text]
 1266 -- entered 457 timeslines ps | null ps   = []
 1267          | otherwise = h : if null t
 1268                            then []
 1269                            else lines (unsafeTail t)
 1270     where (h,t) = spanBy (/= '\n') ps
 1271 {-# INLINE lines #-}
 1272 
 1273 {-
 1274 -- | /O(n)/ Portably breaks a 'Text' up into a list of 'Text's at line
 1275 -- boundaries.
 1276 --
 1277 -- A line boundary is considered to be either a line feed, a carriage
 1278 -- return immediately followed by a line feed, or a carriage return.
 1279 -- This accounts for both Unix and Windows line ending conventions,
 1280 -- and for the old convention used on Mac OS 9 and earlier.
 1281 lines' :: Text -> [Text]
 1282 lines' ps | null ps   = []
 1283           | otherwise = h : case uncons t of
 1284                               Nothing -> []
 1285                               Just (c,t')
 1286                                   | c == '\n' -> lines t'
 1287                                   | c == '\r' -> case uncons t' of
 1288                                                    Just ('\n',t'') -> lines t''
 1289                                                    _               -> lines t'
 1290     where (h,t)    = span notEOL ps
 1291           notEOL c = c /= '\n' && c /= '\r'
 1292 {-# INLINE lines' #-}
 1293 -}
 1294 
 1295 -- | /O(n)/ Joins lines, after appending a terminating newline to
 1296 -- each.
 1297 unlines :: [Text] -> Text
 1298 -- entered 101 timesunlines = concat . L.map (`snoc` '\n')
 1299 {-# INLINE unlines #-}
 1300 
 1301 -- | /O(n)/ Joins words using single space characters.
 1302 unwords :: [Text] -> Text
 1303 -- entered 100 timesunwords = intercalate (singleton ' ')
 1304 {-# INLINE unwords #-}
 1305 
 1306 -- | /O(n)/ The 'isPrefixOf' function takes two 'Text's and returns
 1307 -- 'True' iff the first is a prefix of the second.  This function is
 1308 -- subject to fusion.
 1309 isPrefixOf :: Text -> Text -> Bool
 1310 -- entered 2575 timesisPrefixOf a@(Text _ _ alen) b@(Text _ _ blen) =
 1311     alen <= blen && S.isPrefixOf (stream a) (stream b)
 1312 {-# INLINE [1] isPrefixOf #-}
 1313 
 1314 {-# RULES
 1315 "TEXT isPrefixOf -> fused" [~1] forall s t.
 1316     isPrefixOf s t = S.isPrefixOf (stream s) (stream t)
 1317 "TEXT isPrefixOf -> unfused" [1] forall s t.
 1318     S.isPrefixOf (stream s) (stream t) = isPrefixOf s t
 1319   #-}
 1320 
 1321 -- | /O(n)/ The 'isSuffixOf' function takes two 'Text's and returns
 1322 -- 'True' iff the first is a suffix of the second.
 1323 isSuffixOf :: Text -> Text -> Bool
 1324 -- entered 900 timesisSuffixOf a@(Text _aarr _aoff alen) b@(Text barr boff blen) =
 1325     d >= 0 && a == b'
 1326   where d              = blen - alen
 1327         b' | d == 0    = b
 1328            | otherwise = Text barr (boff+d) alen
 1329 {-# INLINE isSuffixOf #-}
 1330 
 1331 -- | /O(n+m)/ The 'isInfixOf' function takes two 'Text's and returns
 1332 -- 'True' iff the first is contained, wholly and intact, anywhere
 1333 -- within the second.
 1334 --
 1335 -- In (unlikely) bad cases, this function's time complexity degrades
 1336 -- towards /O(n*m)/.
 1337 isInfixOf :: Text -> Text -> Bool
 1338 -- entered 400 timesisInfixOf needle haystack
 1339     | null needle        = True
 1340     | isSingleton needle = S.elem (unsafeHead needle) . S.stream $ haystack
 1341     | otherwise          = not . L.null . indices needle $ haystack
 1342 {-# INLINE [1] isInfixOf #-}
 1343 
 1344 {-# RULES
 1345 "TEXT isInfixOf/singleton -> S.elem/S.stream" [~1] forall n h.
 1346     isInfixOf (singleton n) h = S.elem n (S.stream h)
 1347   #-}
 1348 
 1349 -------------------------------------------------------------------------------
 1350 -- * View patterns
 1351 
 1352 -- | /O(n)/ Returns the suffix of the second string if its prefix
 1353 -- matches the first.
 1354 --
 1355 -- Examples:
 1356 --
 1357 -- > prefixed "foo" "foobar" == Just "bar"
 1358 -- > prefixed "foo" "quux"   == Nothing
 1359 --
 1360 -- This is particularly useful with the @ViewPatterns@ extension to
 1361 -- GHC, as follows:
 1362 --
 1363 -- > {-# LANGUAGE ViewPatterns #-}
 1364 -- > import Data.Text as T
 1365 -- >
 1366 -- > fnordLength :: Text -> Int
 1367 -- > fnordLength (prefixed "fnord" -> Just suf) = T.length suf
 1368 -- > fnordLength _                              = -1
 1369 prefixed :: Text -> Text -> Maybe Text
 1370 -- entered 400 timesprefixed p@(Text _arr _off plen) t@(Text arr off len)
 1371     | p `isPrefixOf` t = Just $! textP arr (off+plen) (len-plen)
 1372     | otherwise        = Nothing
 1373 
 1374 -- | /O(n)/ Returns the prefix of the second string if its suffix
 1375 -- matches the first.
 1376 --
 1377 -- Examples:
 1378 --
 1379 -- > suffixed "bar" "foobar" == Just "foo"
 1380 -- > suffixed "foo" "quux"   == Nothing
 1381 --
 1382 -- This is particularly useful with the @ViewPatterns@ extension to
 1383 -- GHC, as follows:
 1384 --
 1385 -- > {-# LANGUAGE ViewPatterns #-}
 1386 -- > import Data.Text as T
 1387 -- >
 1388 -- > quuxLength :: Text -> Int
 1389 -- > quuxLength (suffixed "quux" -> Just pre) = T.length pre
 1390 -- > quuxLength _                             = -1
 1391 suffixed :: Text -> Text -> Maybe Text
 1392 -- entered 400 timessuffixed p@(Text _arr _off plen) t@(Text arr off len)
 1393     | p `isSuffixOf` t = Just $! textP arr off (len-plen)
 1394     | otherwise        = Nothing
 1395 
 1396 emptyError :: String -> a
 1397 -- entered 63 timesemptyError fun = P.error ("Data.Text." ++ fun ++ ": empty input")