1 {-# LANGUAGE BangPatterns, CPP, Rank2Types #-}
    2 
    3 -----------------------------------------------------------------------------
    4 -- |
    5 -- Module      : Data.Text.Lazy.Builder
    6 -- Copyright   : (c) 2010 Johan Tibell
    7 -- License     : BSD3-style (see LICENSE)
    8 -- 
    9 -- Maintainer  : Johan Tibell <johan.tibell@gmail.com>
   10 -- Stability   : experimental
   11 -- Portability : portable to Hugs and GHC
   12 --
   13 -- Efficient construction of lazy texts.
   14 --
   15 -----------------------------------------------------------------------------
   16 
   17 module Data.Text.Lazy.Builder
   18    ( -- * The Builder type
   19      Builder
   20    , toLazyText
   21    , toLazyTextWith
   22 
   23      -- * Constructing Builders
   24    , singleton
   25    , fromText
   26    , fromLazyText
   27 
   28      -- * Flushing the buffer state
   29    , flush
   30    ) where
   31 
   32 import Control.Monad.ST (ST, runST)
   33 import Data.Bits ((.&.))
   34 import Data.Monoid (Monoid(..))
   35 import Data.Text.Internal (Text(..))
   36 import Data.Text.Lazy.Internal (smallChunkSize)
   37 import Data.Text.Unsafe (inlineInterleaveST)
   38 import Data.Text.UnsafeChar (ord, unsafeWrite)
   39 import Data.Text.UnsafeShift (shiftR)
   40 import Prelude hiding (map, putChar)
   41 
   42 import qualified Data.String as String
   43 import qualified Data.Text as S
   44 import qualified Data.Text.Array as A
   45 import qualified Data.Text.Lazy as L
   46 
   47 ------------------------------------------------------------------------
   48 
   49 -- | A 'Builder' is an efficient way to build lazy 'L.Text's.  There
   50 -- are several functions for constructing 'Builder's, but only one to
   51 -- inspect them: to extract any data, you have to turn them into lazy
   52 -- 'L.Text's using 'toLazyText'.
   53 --
   54 -- Internally, a 'Builder' constructs a lazy 'L.Text' by filling byte
   55 -- arrays piece by piece.  As each buffer is filled, it is \'popped\'
   56 -- off, to become a new chunk of the resulting lazy 'L.Text'.  All
   57 -- this is hidden from the user of the 'Builder'.
   58 newtype Builder = Builder {
   59      -- Invariant (from Data.Text.Lazy):
   60      --      The lists include no null Texts.
   61      runBuilder :: forall s. (Buffer s -> ST s [S.Text])
   62                 -> Buffer s
   63                 -> ST s [S.Text]
   64    }
   65 
   66 instance Monoid Builder where
   67    -- entered oncemempty  = empty
   68    {-# INLINE mempty #-}
   69    -- entered 650 timesmappend = append
   70    {-# INLINE mappend #-}
   71 
   72 instance String.IsString Builder where
   73     -- never enteredfromString = fromString
   74     {-# INLINE fromString #-}
   75 
   76 instance Show Builder where
   77     -- never enteredshow = L.unpack . toLazyText
   78 
   79 ------------------------------------------------------------------------
   80 
   81 -- | /O(1)./ The empty Builder, satisfying
   82 --
   83 --  * @'toLazyText' 'empty' = 'L.empty'@
   84 --
   85 empty :: Builder
   86 -- entered onceempty = Builder id
   87 {-# INLINE empty #-}
   88 
   89 -- | /O(1)./ A Builder taking a single character, satisfying
   90 --
   91 --  * @'toLazyText' ('singleton' c) = 'L.singleton' c@
   92 --
   93 singleton :: Char -> Builder
   94 -- entered 6924 timessingleton c = putChar c
   95 {-# INLINE singleton #-}
   96 
   97 ------------------------------------------------------------------------
   98 
   99 -- | /O(1)./ The concatenation of two Builders, an associative
  100 -- operation with identity 'empty', satisfying
  101 --
  102 --  * @'toLazyText' ('append' x y) = 'L.append' ('toLazyText' x) ('toLazyText' y)@
  103 --
  104 append :: Builder -> Builder -> Builder
  105 -- entered 1350 timesappend (Builder f) (Builder g) = Builder (f . g)
  106 {-# INLINE [0] append #-}
  107 
  108 -- TODO: Experiment to find the right threshold.
  109 copyLimit :: Int
  110 -- entered 2614 timescopyLimit =  128                                 
  111 
  112 -- This function attempts to merge small Texts instead of treating the
  113 -- text as its own chunk.  We may not always want this.
  114 
  115 -- | /O(1)./ A Builder taking a 'S.Text', satisfying
  116 --
  117 --  * @'toLazyText' ('fromText' t) = 'L.fromChunks' [t]@
  118 --
  119 fromText :: S.Text -> Builder
  120 -- entered 2673 timesfromText t@(Text arr off l)
  121     | S.null t       = empty
  122     | l <= copyLimit = writeN l $ \marr o -> A.copyI marr o arr off (l+o)
  123     | otherwise      = flush `append` mapBuilder (t :)
  124 {-# INLINE [1] fromText #-}
  125 
  126 {-# RULES
  127 "fromText/pack" forall s .
  128         fromText (S.pack s) = fromString s
  129  #-}
  130 
  131 -- | /O(1)./ A Builder taking a 'String', satisfying
  132 --
  133 --  * @'toLazyText' ('fromString' s) = 'L.fromChunks' [S.pack s]@
  134 --
  135 fromString :: String -> Builder
  136 -- never enteredfromString str = Builder $ \k (Buffer p0 o0 u0 l0) ->
  137     let loop !marr !o !u !l [] = k (Buffer marr o u l)
  138         loop marr o u l s@(c:cs)
  139             | l <= 1 = do
  140                 arr <- A.unsafeFreeze marr
  141                 let !t = Text arr o u
  142                 marr' <- A.unsafeNew chunkSize
  143                 ts <- inlineInterleaveST (loop marr' 0 0 chunkSize s)
  144                 return $ t : ts
  145             | otherwise = do
  146                 n <- unsafeWrite marr (o+u) c
  147                 loop marr o (u+n) (l-n) cs
  148     in loop p0 o0 u0 l0 str
  149   where
  150     chunkSize = smallChunkSize
  151 {-# INLINE fromString #-}
  152 
  153 -- | /O(1)./ A Builder taking a lazy 'L.Text', satisfying
  154 --
  155 --  * @'toLazyText' ('fromLazyText' t) = t@
  156 --
  157 fromLazyText :: L.Text -> Builder
  158 -- never enteredfromLazyText ts = flush `append` mapBuilder (L.toChunks ts ++)
  159 {-# INLINE fromLazyText #-}
  160 
  161 ------------------------------------------------------------------------
  162 
  163 -- Our internal buffer type
  164 data Buffer s = Buffer {-# UNPACK #-} !(A.MArray s)
  165                        {-# UNPACK #-} !Int  -- offset
  166                        {-# UNPACK #-} !Int  -- used units
  167                        {-# UNPACK #-} !Int  -- length left
  168 
  169 ------------------------------------------------------------------------
  170 
  171 -- | /O(n)./ Extract a lazy 'L.Text' from a 'Builder' with a default
  172 -- buffer size.  The construction work takes place if and when the
  173 -- relevant part of the lazy 'L.Text' is demanded.
  174 toLazyText :: Builder -> L.Text
  175 -- entered 600 timestoLazyText = toLazyTextWith smallChunkSize
  176 
  177 -- | /O(n)./ Extract a lazy 'L.Text' from a 'Builder', using the given
  178 -- size for the initial buffer.  The construction work takes place if
  179 -- and when the relevant part of the lazy 'L.Text' is demanded.
  180 --
  181 -- If the initial buffer is too small to hold all data, subsequent
  182 -- buffers will be the default buffer size.
  183 toLazyTextWith :: Int -> Builder -> L.Text
  184 -- entered 700 timestoLazyTextWith chunkSize m = L.fromChunks . runST $
  185   newBuffer chunkSize >>= runBuilder (m `append` flush) (const (return []))
  186 
  187 -- | /O(1)./ Pop the 'S.Text' we have constructed so far, if any,
  188 -- yielding a new chunk in the result lazy 'L.Text'.
  189 flush :: Builder
  190 -- entered onceflush = Builder $ \ k buf@(Buffer p o u l) ->
  191     if u == 0
  192     then k buf
  193     else do arr <- A.unsafeFreeze p
  194             let !b = Buffer p (o+u) 0 l
  195                 !t = Text arr o u
  196             ts <- inlineInterleaveST (k b)
  197             return $! t : ts
  198 
  199 ------------------------------------------------------------------------
  200 
  201 -- | Sequence an ST operation on the buffer
  202 withBuffer :: (forall s. Buffer s -> ST s (Buffer s)) -> Builder
  203 -- entered 10,373 timeswithBuffer f = Builder $ \k buf -> f buf >>= k
  204 {-# INLINE withBuffer #-}
  205 
  206 -- | Get the size of the buffer
  207 withSize :: (Int -> Builder) -> Builder
  208 -- entered 9538 timeswithSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
  209     runBuilder (f l) k buf
  210 {-# INLINE withSize #-}
  211 
  212 -- | Map the resulting list of texts.
  213 mapBuilder :: ([S.Text] -> [S.Text]) -> Builder
  214 -- never enteredmapBuilder f = Builder (fmap f .)
  215 
  216 ------------------------------------------------------------------------
  217 
  218 putChar :: Char -> Builder
  219 -- entered 6924 timesputChar c
  220     | n < 0x10000 = writeN 1 $ \marr o -> A.unsafeWrite marr o (fromIntegral n)
  221     | otherwise   = writeN 2 $ \marr o -> do
  222           A.unsafeWrite marr o lo
  223           A.unsafeWrite marr (o+1) hi
  224   where n = ord c
  225         m = n - 0x10000
  226         lo = fromIntegral $ (m `shiftR` 10) + 0xD800
  227         hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
  228 {-# INLINE putChar #-}
  229 
  230 ------------------------------------------------------------------------
  231 
  232 -- | Ensure that there are at least @n@ many elements available.
  233 ensureFree :: Int -> Builder
  234 -- entered 9538 timesensureFree !n = withSize $ \ l ->
  235     if n <= l
  236     then empty
  237     else flush `append'` withBuffer (const (newBuffer (max n smallChunkSize)))
  238 {-# INLINE [0] ensureFree #-}
  239 
  240 -- | Ensure that @n@ many elements are available, and then use @f@ to
  241 -- write some elements into the memory.
  242 writeN :: Int -> (forall s. A.MArray s -> Int -> ST s ()) -> Builder
  243 -- entered 9538 timeswriteN n f = ensureFree n `append'` withBuffer (writeNBuffer n f)
  244 {-# INLINE [0] writeN #-}
  245 
  246 writeNBuffer :: Int -> (A.MArray s -> Int -> ST s ()) -> (Buffer s)
  247              -> ST s (Buffer s)
  248 -- entered 9825 timeswriteNBuffer n f (Buffer p o u l) = do
  249     f p (o+u)
  250     return $! Buffer p o (u+n) (l-n)
  251 {-# INLINE writeNBuffer #-}
  252 
  253 newBuffer :: Int -> ST s (Buffer s)
  254 -- entered 1551 timesnewBuffer size = do
  255     arr <- A.unsafeNew size
  256     return $! Buffer arr 0 0 size
  257 {-# INLINE newBuffer #-}
  258 
  259 ------------------------------------------------------------------------
  260 -- Some nice rules for Builder
  261 
  262 -- This function makes GHC understand that 'writeN' and 'ensureFree'
  263 -- are *not* recursive in the precense of the rewrite rules below.
  264 -- This is not needed with GHC 6.14+.
  265 append' :: Builder -> Builder -> Builder
  266 -- entered 9538 timesappend' (Builder f) (Builder g) = Builder (f . g)
  267 {-# INLINE append' #-}
  268 
  269 {-# RULES
  270 
  271 "append/writeN" forall a b (f::forall s. A.MArray s -> Int -> ST s ())
  272                            (g::forall s. A.MArray s -> Int -> ST s ()) ws.
  273         append (writeN a f) (append (writeN b g) ws) =
  274             append (writeN (a+b) (\marr o -> f marr o >> g marr (o+a))) ws
  275 
  276 "writeN/writeN" forall a b (f::forall s. A.MArray s -> Int -> ST s ())
  277                            (g::forall s. A.MArray s -> Int -> ST s ()).
  278         append (writeN a f) (writeN b g) =
  279             writeN (a+b) (\marr o -> f marr o >> g marr (o+a))
  280 
  281 "ensureFree/ensureFree" forall a b .
  282         append (ensureFree a) (ensureFree b) = ensureFree (max a b)
  283 
  284 "flush/flush"
  285         append flush flush = flush
  286 
  287  #-}