1 {-# LANGUAGE BangPatterns, MagicHash #-}
    2 
    3 -- |
    4 -- Module      : Data.Text.Fusion
    5 -- Copyright   : (c) Tom Harper 2008-2009,
    6 --               (c) Bryan O'Sullivan 2009-2010,
    7 --               (c) Duncan Coutts 2009
    8 --
    9 -- License     : BSD-style
   10 -- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
   11 --               duncan@haskell.org
   12 -- Stability   : experimental
   13 -- Portability : GHC
   14 --
   15 -- Text manipulation functions represented as fusible operations over
   16 -- streams.
   17 module Data.Text.Fusion
   18     (
   19     -- * Types
   20       Stream(..)
   21     , Step(..)
   22 
   23     -- * Creation and elimination
   24     , stream
   25     , unstream
   26     , reverseStream
   27 
   28     , length
   29 
   30     -- * Transformations
   31     , reverse
   32 
   33     -- * Construction
   34     -- ** Scans
   35     , reverseScanr
   36 
   37     -- ** Generation and unfolding
   38     , unfoldrN
   39 
   40     -- * Indexing
   41     , index
   42     , findIndex
   43     , countChar
   44     ) where
   45 
   46 import Prelude (Bool(..), Char, Maybe(..), Monad(..), Int,
   47                 Num(..), Ord(..), ($), (&&),
   48                 fromIntegral, otherwise)
   49 import Data.Bits ((.&.))
   50 import Data.Text.Internal (Text(..))
   51 import Data.Text.UnsafeChar (ord, unsafeChr, unsafeWrite)
   52 import Data.Text.UnsafeShift (shiftL, shiftR)
   53 import qualified Data.Text.Array as A
   54 import qualified Data.Text.Fusion.Common as S
   55 import Data.Text.Fusion.Internal
   56 import Data.Text.Fusion.Size
   57 import qualified Data.Text.Internal as I
   58 import qualified Data.Text.Encoding.Utf16 as U16
   59 import qualified Prelude as P
   60 
   61 default(Int)
   62 
   63 -- | /O(n)/ Convert a 'Text' into a 'Stream Char'.
   64 stream :: Text -> Stream Char
   65 -- entered 259,852 timesstream (Text arr off len) = Stream next off (maxSize len)
   66     where
   67       !end = off+len
   68       next !i
   69           | i >= end                   = Done
   70           | n >= 0xD800 && n <= 0xDBFF = Yield (U16.chr2 n n2) (i + 2)
   71           | otherwise                  = Yield (unsafeChr n) (i + 1)
   72           where
   73             n  = A.unsafeIndex arr i
   74             n2 = A.unsafeIndex arr (i + 1)
   75 {-# INLINE [0] stream #-}
   76 
   77 -- | /O(n)/ Convert a 'Text' into a 'Stream Char', but iterate
   78 -- backwards.
   79 reverseStream :: Text -> Stream Char
   80 -- entered 867 timesreverseStream (Text arr off len) = Stream next (off+len-1) (maxSize len)
   81     where
   82       {-# INLINE next #-}
   83       next !i
   84           | i < off                    = Done
   85           | n >= 0xDC00 && n <= 0xDFFF = Yield (U16.chr2 n2 n) (i - 2)
   86           | otherwise                  = Yield (unsafeChr n) (i - 1)
   87           where
   88             n  = A.unsafeIndex arr i
   89             n2 = A.unsafeIndex arr (i - 1)
   90 {-# INLINE [0] reverseStream #-}
   91 
   92 -- | /O(n)/ Convert a 'Stream Char' into a 'Text'.
   93 unstream :: Stream Char -> Text
   94 -- entered 138,985 timesunstream (Stream next0 s0 len) = I.textP (P.fst a) 0 (P.snd a)
   95   where
   96     a = A.run2 (A.unsafeNew mlen >>= \arr -> outer arr mlen s0 0)
   97       where mlen = upperBound 4 len
   98     outer arr top = loop
   99       where
  100         loop !s !i =
  101             case next0 s of
  102               Done          -> return (arr, i)
  103               Skip s'       -> loop s' i
  104               Yield x s'
  105                 | j >= top  -> {-# SCC "unstream/resize" #-} do
  106                                let top' = (top + 1) `shiftL` 1
  107                                arr' <- A.unsafeNew top'
  108                                A.copyM arr' 0 arr 0 top
  109                                outer arr' top' s i
  110                 | otherwise -> do d <- unsafeWrite arr i x
  111                                   loop s' (i+d)
  112                 where j | ord x < 0x10000 = i
  113                         | otherwise       = i + 1
  114 {-# INLINE [0] unstream #-}
  115 {-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}
  116 
  117 
  118 -- ----------------------------------------------------------------------------
  119 -- * Basic stream functions
  120 
  121 length :: Stream Char -> Int
  122 -- entered 37,336 timeslength = S.lengthI
  123 {-# INLINE[0] length #-}
  124 
  125 -- | /O(n)/ Reverse the characters of a string.
  126 reverse :: Stream Char -> Text
  127 -- entered 5652 timesreverse (Stream next s len0)
  128     | isEmpty len0 = I.empty
  129     | otherwise    = I.textP arr off' len'
  130   where
  131     len0' = upperBound 4 (larger len0 4)
  132     (arr, (off', len')) = A.run2 (A.unsafeNew len0' >>= loop s (len0'-1) len0')
  133     loop !s0 !i !len marr =
  134         case next s0 of
  135           Done -> return (marr, (j, len-j))
  136               where j = i + 1
  137           Skip s1    -> loop s1 i len marr
  138           Yield x s1 | i < least -> {-# SCC "reverse/resize" #-} do
  139                        let newLen = len `shiftL` 1
  140                        marr' <- A.unsafeNew newLen
  141                        A.copyM marr' (newLen-len) marr 0 len
  142                        write s1 (len+i) newLen marr'
  143                      | otherwise -> write s1 i len marr
  144             where n = ord x
  145                   least | n < 0x10000 = 0
  146                         | otherwise   = 1
  147                   m = n - 0x10000
  148                   lo = fromIntegral $ (m `shiftR` 10) + 0xD800
  149                   hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
  150                   write t j l mar
  151                       | n < 0x10000 = do
  152                           A.unsafeWrite mar j (fromIntegral n)
  153                           loop t (j-1) l mar
  154                       | otherwise = do
  155                           A.unsafeWrite mar (j-1) lo
  156                           A.unsafeWrite mar j hi
  157                           loop t (j-2) l mar
  158 {-# INLINE [0] reverse #-}
  159 
  160 -- | /O(n)/ Perform the equivalent of 'scanr' over a list, only with
  161 -- the input and result reversed.
  162 reverseScanr :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
  163 -- entered 767 timesreverseScanr f z0 (Stream next0 s0 len) = Stream next (S1 :*: z0 :*: s0) (len+1) -- HINT maybe too low
  164   where
  165     {-# INLINE next #-}
  166     next (S1 :*: z :*: s) = Yield z (S2 :*: z :*: s)
  167     next (S2 :*: z :*: s) = case next0 s of
  168                               Yield x s' -> let !x' = f x z
  169                                             in Yield x' (S2 :*: x' :*: s')
  170                               Skip s'    -> Skip (S2 :*: z :*: s')
  171                               Done       -> Done
  172 {-# INLINE reverseScanr #-}
  173 
  174 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a stream from a seed
  175 -- value. However, the length of the result is limited by the
  176 -- first argument to 'unfoldrN'. This function is more efficient than
  177 -- 'unfoldr' when the length of the result is known.
  178 unfoldrN :: Int -> (a -> Maybe (Char,a)) -> a -> Stream Char
  179 -- entered 100 timesunfoldrN n = S.unfoldrNI n
  180 {-# INLINE [0] unfoldrN #-}
  181 
  182 -------------------------------------------------------------------------------
  183 -- ** Indexing streams
  184 
  185 -- | /O(n)/ stream index (subscript) operator, starting from 0.
  186 index :: Stream Char -> Int -> Char
  187 -- entered 200 timesindex = S.indexI
  188 {-# INLINE [0] index #-}
  189 
  190 -- | The 'findIndex' function takes a predicate and a stream and
  191 -- returns the index of the first element in the stream
  192 -- satisfying the predicate.
  193 findIndex :: (Char -> Bool) -> Stream Char -> Maybe Int
  194 -- entered 12,505 timesfindIndex = S.findIndexI
  195 {-# INLINE [0] findIndex #-}
  196 
  197 -- | /O(n)/ The 'count' function returns the number of times the query
  198 -- element appears in the given stream.
  199 countChar :: Char -> Stream Char -> Int
  200 -- entered 107 timescountChar = S.countCharI
  201 {-# INLINE [0] countChar #-}