1 {-# LANGUAGE BangPatterns, MagicHash #-} 2 3 -- | 4 -- Module : Data.Text.Fusion 5 -- Copyright : (c) Tom Harper 2008-2009, 6 -- (c) Bryan O'Sullivan 2009-2010, 7 -- (c) Duncan Coutts 2009 8 -- 9 -- License : BSD-style 10 -- Maintainer : bos@serpentine.com, rtomharper@googlemail.com, 11 -- duncan@haskell.org 12 -- Stability : experimental 13 -- Portability : GHC 14 -- 15 -- Text manipulation functions represented as fusible operations over 16 -- streams. 17 module Data.Text.Fusion 18 ( 19 -- * Types 20 Stream(..) 21 , Step(..) 22 23 -- * Creation and elimination 24 , stream 25 , unstream 26 , reverseStream 27 28 , length 29 30 -- * Transformations 31 , reverse 32 33 -- * Construction 34 -- ** Scans 35 , reverseScanr 36 37 -- ** Generation and unfolding 38 , unfoldrN 39 40 -- * Indexing 41 , index 42 , findIndex 43 , countChar 44 ) where 45 46 import Prelude (Bool(..), Char, Maybe(..), Monad(..), Int, 47 Num(..), Ord(..), ($), (&&), 48 fromIntegral, otherwise) 49 import Data.Bits ((.&.)) 50 import Data.Text.Internal (Text(..)) 51 import Data.Text.UnsafeChar (ord, unsafeChr, unsafeWrite) 52 import Data.Text.UnsafeShift (shiftL, shiftR) 53 import qualified Data.Text.Array as A 54 import qualified Data.Text.Fusion.Common as S 55 import Data.Text.Fusion.Internal 56 import Data.Text.Fusion.Size 57 import qualified Data.Text.Internal as I 58 import qualified Data.Text.Encoding.Utf16 as U16 59 import qualified Prelude as P 60 61 default(Int) 62 63 -- | /O(n)/ Convert a 'Text' into a 'Stream Char'. 64 stream :: Text -> Stream Char 65 -- entered 259,852 timesstream (Text arr off len) = Stream next off (maxSize len) 66 where 67 !end = off+len 68 next !i 69 | i >= end = Done 70 | n >= 0xD800 && n <= 0xDBFF = Yield (U16.chr2 n n2) (i + 2) 71 | otherwise = Yield (unsafeChr n) (i + 1) 72 where 73 n = A.unsafeIndex arr i 74 n2 = A.unsafeIndex arr (i + 1) 75 {-# INLINE [0] stream #-} 76 77 -- | /O(n)/ Convert a 'Text' into a 'Stream Char', but iterate 78 -- backwards. 79 reverseStream :: Text -> Stream Char 80 -- entered 867 timesreverseStream (Text arr off len) = Stream next (off+len-1) (maxSize len) 81 where 82 {-# INLINE next #-} 83 next !i 84 | i < off = Done 85 | n >= 0xDC00 && n <= 0xDFFF = Yield (U16.chr2 n2 n) (i - 2) 86 | otherwise = Yield (unsafeChr n) (i - 1) 87 where 88 n = A.unsafeIndex arr i 89 n2 = A.unsafeIndex arr (i - 1) 90 {-# INLINE [0] reverseStream #-} 91 92 -- | /O(n)/ Convert a 'Stream Char' into a 'Text'. 93 unstream :: Stream Char -> Text 94 -- entered 138,985 timesunstream (Stream next0 s0 len) = I.textP (P.fst a) 0 (P.snd a) 95 where 96 a = A.run2 (A.unsafeNew mlen >>= \arr -> outer arr mlen s0 0) 97 where mlen = upperBound 4 len 98 outer arr top = loop 99 where 100 loop !s !i = 101 case next0 s of 102 Done -> return (arr, i) 103 Skip s' -> loop s' i 104 Yield x s' 105 | j >= top -> {-# SCC "unstream/resize" #-} do 106 let top' = (top + 1) `shiftL` 1 107 arr' <- A.unsafeNew top' 108 A.copyM arr' 0 arr 0 top 109 outer arr' top' s i 110 | otherwise -> do d <- unsafeWrite arr i x 111 loop s' (i+d) 112 where j | ord x < 0x10000 = i 113 | otherwise = i + 1 114 {-# INLINE [0] unstream #-} 115 {-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-} 116 117 118 -- ---------------------------------------------------------------------------- 119 -- * Basic stream functions 120 121 length :: Stream Char -> Int 122 -- entered 37,336 timeslength = S.lengthI 123 {-# INLINE[0] length #-} 124 125 -- | /O(n)/ Reverse the characters of a string. 126 reverse :: Stream Char -> Text 127 -- entered 5652 timesreverse (Stream next s len0) 128 | isEmpty len0 = I.empty 129 | otherwise = I.textP arr off' len' 130 where 131 len0' = upperBound 4 (larger len0 4) 132 (arr, (off', len')) = A.run2 (A.unsafeNew len0' >>= loop s (len0'-1) len0') 133 loop !s0 !i !len marr = 134 case next s0 of 135 Done -> return (marr, (j, len-j)) 136 where j = i + 1 137 Skip s1 -> loop s1 i len marr 138 Yield x s1 | i < least -> {-# SCC "reverse/resize" #-} do 139 let newLen = len `shiftL` 1 140 marr' <- A.unsafeNew newLen 141 A.copyM marr' (newLen-len) marr 0 len 142 write s1 (len+i) newLen marr' 143 | otherwise -> write s1 i len marr 144 where n = ord x 145 least | n < 0x10000 = 0 146 | otherwise = 1 147 m = n - 0x10000 148 lo = fromIntegral $ (m `shiftR` 10) + 0xD800 149 hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00 150 write t j l mar 151 | n < 0x10000 = do 152 A.unsafeWrite mar j (fromIntegral n) 153 loop t (j-1) l mar 154 | otherwise = do 155 A.unsafeWrite mar (j-1) lo 156 A.unsafeWrite mar j hi 157 loop t (j-2) l mar 158 {-# INLINE [0] reverse #-} 159 160 -- | /O(n)/ Perform the equivalent of 'scanr' over a list, only with 161 -- the input and result reversed. 162 reverseScanr :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char 163 -- entered 767 timesreverseScanr f z0 (Stream next0 s0 len) = Stream next (S1 :*: z0 :*: s0) (len+1) -- HINT maybe too low 164 where 165 {-# INLINE next #-} 166 next (S1 :*: z :*: s) = Yield z (S2 :*: z :*: s) 167 next (S2 :*: z :*: s) = case next0 s of 168 Yield x s' -> let !x' = f x z 169 in Yield x' (S2 :*: x' :*: s') 170 Skip s' -> Skip (S2 :*: z :*: s') 171 Done -> Done 172 {-# INLINE reverseScanr #-} 173 174 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a stream from a seed 175 -- value. However, the length of the result is limited by the 176 -- first argument to 'unfoldrN'. This function is more efficient than 177 -- 'unfoldr' when the length of the result is known. 178 unfoldrN :: Int -> (a -> Maybe (Char,a)) -> a -> Stream Char 179 -- entered 100 timesunfoldrN n = S.unfoldrNI n 180 {-# INLINE [0] unfoldrN #-} 181 182 ------------------------------------------------------------------------------- 183 -- ** Indexing streams 184 185 -- | /O(n)/ stream index (subscript) operator, starting from 0. 186 index :: Stream Char -> Int -> Char 187 -- entered 200 timesindex = S.indexI 188 {-# INLINE [0] index #-} 189 190 -- | The 'findIndex' function takes a predicate and a stream and 191 -- returns the index of the first element in the stream 192 -- satisfying the predicate. 193 findIndex :: (Char -> Bool) -> Stream Char -> Maybe Int 194 -- entered 12,505 timesfindIndex = S.findIndexI 195 {-# INLINE [0] findIndex #-} 196 197 -- | /O(n)/ The 'count' function returns the number of times the query 198 -- element appears in the given stream. 199 countChar :: Char -> Stream Char -> Int 200 -- entered 107 timescountChar = S.countCharI 201 {-# INLINE [0] countChar #-}