1 {-# LANGUAGE BangPatterns, CPP, Rank2Types #-} 2 3 ----------------------------------------------------------------------------- 4 -- | 5 -- Module : Data.Text.Lazy.Builder 6 -- Copyright : (c) 2010 Johan Tibell 7 -- License : BSD3-style (see LICENSE) 8 -- 9 -- Maintainer : Johan Tibell <johan.tibell@gmail.com> 10 -- Stability : experimental 11 -- Portability : portable to Hugs and GHC 12 -- 13 -- Efficient construction of lazy texts. 14 -- 15 ----------------------------------------------------------------------------- 16 17 module Data.Text.Lazy.Builder 18 ( -- * The Builder type 19 Builder 20 , toLazyText 21 , toLazyTextWith 22 23 -- * Constructing Builders 24 , singleton 25 , fromText 26 , fromLazyText 27 28 -- * Flushing the buffer state 29 , flush 30 ) where 31 32 import Control.Monad.ST (ST, runST) 33 import Data.Bits ((.&.)) 34 import Data.Monoid (Monoid(..)) 35 import Data.Text.Internal (Text(..)) 36 import Data.Text.Lazy.Internal (smallChunkSize) 37 import Data.Text.Unsafe (inlineInterleaveST) 38 import Data.Text.UnsafeChar (ord, unsafeWrite) 39 import Data.Text.UnsafeShift (shiftR) 40 import Prelude hiding (map, putChar) 41 42 import qualified Data.String as String 43 import qualified Data.Text as S 44 import qualified Data.Text.Array as A 45 import qualified Data.Text.Lazy as L 46 47 ------------------------------------------------------------------------ 48 49 -- | A 'Builder' is an efficient way to build lazy 'L.Text's. There 50 -- are several functions for constructing 'Builder's, but only one to 51 -- inspect them: to extract any data, you have to turn them into lazy 52 -- 'L.Text's using 'toLazyText'. 53 -- 54 -- Internally, a 'Builder' constructs a lazy 'L.Text' by filling byte 55 -- arrays piece by piece. As each buffer is filled, it is \'popped\' 56 -- off, to become a new chunk of the resulting lazy 'L.Text'. All 57 -- this is hidden from the user of the 'Builder'. 58 newtype Builder = Builder { 59 -- Invariant (from Data.Text.Lazy): 60 -- The lists include no null Texts. 61 runBuilder :: forall s. (Buffer s -> ST s [S.Text]) 62 -> Buffer s 63 -> ST s [S.Text] 64 } 65 66 instance Monoid Builder where 67 -- entered oncemempty = empty 68 {-# INLINE mempty #-} 69 -- entered 650 timesmappend = append 70 {-# INLINE mappend #-} 71 72 instance String.IsString Builder where 73 -- never enteredfromString = fromString 74 {-# INLINE fromString #-} 75 76 instance Show Builder where 77 -- never enteredshow = L.unpack . toLazyText 78 79 ------------------------------------------------------------------------ 80 81 -- | /O(1)./ The empty Builder, satisfying 82 -- 83 -- * @'toLazyText' 'empty' = 'L.empty'@ 84 -- 85 empty :: Builder 86 -- entered onceempty = Builder id 87 {-# INLINE empty #-} 88 89 -- | /O(1)./ A Builder taking a single character, satisfying 90 -- 91 -- * @'toLazyText' ('singleton' c) = 'L.singleton' c@ 92 -- 93 singleton :: Char -> Builder 94 -- entered 6924 timessingleton c = putChar c 95 {-# INLINE singleton #-} 96 97 ------------------------------------------------------------------------ 98 99 -- | /O(1)./ The concatenation of two Builders, an associative 100 -- operation with identity 'empty', satisfying 101 -- 102 -- * @'toLazyText' ('append' x y) = 'L.append' ('toLazyText' x) ('toLazyText' y)@ 103 -- 104 append :: Builder -> Builder -> Builder 105 -- entered 1350 timesappend (Builder f) (Builder g) = Builder (f . g) 106 {-# INLINE [0] append #-} 107 108 -- TODO: Experiment to find the right threshold. 109 copyLimit :: Int 110 -- entered 2614 timescopyLimit = 128 111 112 -- This function attempts to merge small Texts instead of treating the 113 -- text as its own chunk. We may not always want this. 114 115 -- | /O(1)./ A Builder taking a 'S.Text', satisfying 116 -- 117 -- * @'toLazyText' ('fromText' t) = 'L.fromChunks' [t]@ 118 -- 119 fromText :: S.Text -> Builder 120 -- entered 2673 timesfromText t@(Text arr off l) 121 | S.null t = empty 122 | l <= copyLimit = writeN l $ \marr o -> A.copyI marr o arr off (l+o) 123 | otherwise = flush `append` mapBuilder (t :) 124 {-# INLINE [1] fromText #-} 125 126 {-# RULES 127 "fromText/pack" forall s . 128 fromText (S.pack s) = fromString s 129 #-} 130 131 -- | /O(1)./ A Builder taking a 'String', satisfying 132 -- 133 -- * @'toLazyText' ('fromString' s) = 'L.fromChunks' [S.pack s]@ 134 -- 135 fromString :: String -> Builder 136 -- never enteredfromString str = Builder $ \k (Buffer p0 o0 u0 l0) -> 137 let loop !marr !o !u !l [] = k (Buffer marr o u l) 138 loop marr o u l s@(c:cs) 139 | l <= 1 = do 140 arr <- A.unsafeFreeze marr 141 let !t = Text arr o u 142 marr' <- A.unsafeNew chunkSize 143 ts <- inlineInterleaveST (loop marr' 0 0 chunkSize s) 144 return $ t : ts 145 | otherwise = do 146 n <- unsafeWrite marr (o+u) c 147 loop marr o (u+n) (l-n) cs 148 in loop p0 o0 u0 l0 str 149 where 150 chunkSize = smallChunkSize 151 {-# INLINE fromString #-} 152 153 -- | /O(1)./ A Builder taking a lazy 'L.Text', satisfying 154 -- 155 -- * @'toLazyText' ('fromLazyText' t) = t@ 156 -- 157 fromLazyText :: L.Text -> Builder 158 -- never enteredfromLazyText ts = flush `append` mapBuilder (L.toChunks ts ++) 159 {-# INLINE fromLazyText #-} 160 161 ------------------------------------------------------------------------ 162 163 -- Our internal buffer type 164 data Buffer s = Buffer {-# UNPACK #-} !(A.MArray s) 165 {-# UNPACK #-} !Int -- offset 166 {-# UNPACK #-} !Int -- used units 167 {-# UNPACK #-} !Int -- length left 168 169 ------------------------------------------------------------------------ 170 171 -- | /O(n)./ Extract a lazy 'L.Text' from a 'Builder' with a default 172 -- buffer size. The construction work takes place if and when the 173 -- relevant part of the lazy 'L.Text' is demanded. 174 toLazyText :: Builder -> L.Text 175 -- entered 600 timestoLazyText = toLazyTextWith smallChunkSize 176 177 -- | /O(n)./ Extract a lazy 'L.Text' from a 'Builder', using the given 178 -- size for the initial buffer. The construction work takes place if 179 -- and when the relevant part of the lazy 'L.Text' is demanded. 180 -- 181 -- If the initial buffer is too small to hold all data, subsequent 182 -- buffers will be the default buffer size. 183 toLazyTextWith :: Int -> Builder -> L.Text 184 -- entered 700 timestoLazyTextWith chunkSize m = L.fromChunks . runST $ 185 newBuffer chunkSize >>= runBuilder (m `append` flush) (const (return [])) 186 187 -- | /O(1)./ Pop the 'S.Text' we have constructed so far, if any, 188 -- yielding a new chunk in the result lazy 'L.Text'. 189 flush :: Builder 190 -- entered onceflush = Builder $ \ k buf@(Buffer p o u l) -> 191 if u == 0 192 then k buf 193 else do arr <- A.unsafeFreeze p 194 let !b = Buffer p (o+u) 0 l 195 !t = Text arr o u 196 ts <- inlineInterleaveST (k b) 197 return $! t : ts 198 199 ------------------------------------------------------------------------ 200 201 -- | Sequence an ST operation on the buffer 202 withBuffer :: (forall s. Buffer s -> ST s (Buffer s)) -> Builder 203 -- entered 10,373 timeswithBuffer f = Builder $ \k buf -> f buf >>= k 204 {-# INLINE withBuffer #-} 205 206 -- | Get the size of the buffer 207 withSize :: (Int -> Builder) -> Builder 208 -- entered 9538 timeswithSize f = Builder $ \ k buf@(Buffer _ _ _ l) -> 209 runBuilder (f l) k buf 210 {-# INLINE withSize #-} 211 212 -- | Map the resulting list of texts. 213 mapBuilder :: ([S.Text] -> [S.Text]) -> Builder 214 -- never enteredmapBuilder f = Builder (fmap f .) 215 216 ------------------------------------------------------------------------ 217 218 putChar :: Char -> Builder 219 -- entered 6924 timesputChar c 220 | n < 0x10000 = writeN 1 $ \marr o -> A.unsafeWrite marr o (fromIntegral n) 221 | otherwise = writeN 2 $ \marr o -> do 222 A.unsafeWrite marr o lo 223 A.unsafeWrite marr (o+1) hi 224 where n = ord c 225 m = n - 0x10000 226 lo = fromIntegral $ (m `shiftR` 10) + 0xD800 227 hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00 228 {-# INLINE putChar #-} 229 230 ------------------------------------------------------------------------ 231 232 -- | Ensure that there are at least @n@ many elements available. 233 ensureFree :: Int -> Builder 234 -- entered 9538 timesensureFree !n = withSize $ \ l -> 235 if n <= l 236 then empty 237 else flush `append'` withBuffer (const (newBuffer (max n smallChunkSize))) 238 {-# INLINE [0] ensureFree #-} 239 240 -- | Ensure that @n@ many elements are available, and then use @f@ to 241 -- write some elements into the memory. 242 writeN :: Int -> (forall s. A.MArray s -> Int -> ST s ()) -> Builder 243 -- entered 9538 timeswriteN n f = ensureFree n `append'` withBuffer (writeNBuffer n f) 244 {-# INLINE [0] writeN #-} 245 246 writeNBuffer :: Int -> (A.MArray s -> Int -> ST s ()) -> (Buffer s) 247 -> ST s (Buffer s) 248 -- entered 9825 timeswriteNBuffer n f (Buffer p o u l) = do 249 f p (o+u) 250 return $! Buffer p o (u+n) (l-n) 251 {-# INLINE writeNBuffer #-} 252 253 newBuffer :: Int -> ST s (Buffer s) 254 -- entered 1551 timesnewBuffer size = do 255 arr <- A.unsafeNew size 256 return $! Buffer arr 0 0 size 257 {-# INLINE newBuffer #-} 258 259 ------------------------------------------------------------------------ 260 -- Some nice rules for Builder 261 262 -- This function makes GHC understand that 'writeN' and 'ensureFree' 263 -- are *not* recursive in the precense of the rewrite rules below. 264 -- This is not needed with GHC 6.14+. 265 append' :: Builder -> Builder -> Builder 266 -- entered 9538 timesappend' (Builder f) (Builder g) = Builder (f . g) 267 {-# INLINE append' #-} 268 269 {-# RULES 270 271 "append/writeN" forall a b (f::forall s. A.MArray s -> Int -> ST s ()) 272 (g::forall s. A.MArray s -> Int -> ST s ()) ws. 273 append (writeN a f) (append (writeN b g) ws) = 274 append (writeN (a+b) (\marr o -> f marr o >> g marr (o+a))) ws 275 276 "writeN/writeN" forall a b (f::forall s. A.MArray s -> Int -> ST s ()) 277 (g::forall s. A.MArray s -> Int -> ST s ()). 278 append (writeN a f) (writeN b g) = 279 writeN (a+b) (\marr o -> f marr o >> g marr (o+a)) 280 281 "ensureFree/ensureFree" forall a b . 282 append (ensureFree a) (ensureFree b) = ensureFree (max a b) 283 284 "flush/flush" 285 append flush flush = flush 286 287 #-}