1 {-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
    2 -- |
    3 -- Module      : Data.Text.Unsafe
    4 -- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
    5 -- License     : BSD-style
    6 -- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
    7 --               duncan@haskell.org
    8 -- Stability   : experimental
    9 -- Portability : portable
   10 --
   11 -- A module containing unsafe 'Text' operations, for very very careful
   12 -- use in heavily tested code.
   13 module Data.Text.Unsafe
   14     (
   15       inlineInterleaveST
   16     , inlinePerformIO
   17     , Iter(..)
   18     , iter
   19     , iter_
   20     , reverseIter
   21     , unsafeHead
   22     , unsafeTail
   23     , lengthWord16
   24     ) where
   25      
   26 #if defined(ASSERTS)
   27 import Control.Exception (assert)
   28 #endif
   29 import Data.Text.Encoding.Utf16 (chr2)
   30 import Data.Text.Internal (Text(..))
   31 import Data.Text.UnsafeChar (unsafeChr)
   32 import GHC.ST (ST(..))
   33 import qualified Data.Text.Array as A
   34 #if defined(__GLASGOW_HASKELL__)
   35 # if __GLASGOW_HASKELL__ >= 611
   36 import GHC.IO (IO(IO))
   37 # else
   38 import GHC.IOBase (IO(IO))
   39 # endif
   40 import GHC.Base (realWorld#)
   41 #endif
   42 
   43 -- | /O(1)/ A variant of 'head' for non-empty 'Text'. 'unsafeHead'
   44 -- omits the check for the empty case, so there is an obligation on
   45 -- the programmer to provide a proof that the 'Text' is non-empty.
   46 unsafeHead :: Text -> Char
   47 -- entered 27,538 timesunsafeHead (Text arr off _len)
   48     | m < 0xD800 || m > 0xDBFF = unsafeChr m
   49     | otherwise                = chr2 m n
   50     where m = A.unsafeIndex arr off
   51           n = A.unsafeIndex arr (off+1)
   52 {-# INLINE unsafeHead #-}
   53 
   54 -- | /O(1)/ A variant of 'tail' for non-empty 'Text'. 'unsafeHead'
   55 -- omits the check for the empty case, so there is an obligation on
   56 -- the programmer to provide a proof that the 'Text' is non-empty.
   57 unsafeTail :: Text -> Text
   58 -- entered 109,112 timesunsafeTail t@(Text arr off len) =
   59 #if defined(ASSERTS)
   60     assert (d <= len) $
   61 #endif
   62     Text arr (off+d) (len-d)
   63   where d = iter_ t 0
   64 {-# INLINE unsafeTail #-}
   65 
   66 data Iter = Iter {-# UNPACK #-} !Char {-# UNPACK #-} !Int
   67 
   68 -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
   69 -- array, returning the current character and the delta to add to give
   70 -- the next offset to iterate at.
   71 iter :: Text -> Int -> Iter
   72 -- entered 2,761,610 timesiter (Text arr off _len) i
   73     | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
   74     | otherwise                = Iter (chr2 m n) 2
   75   where m = A.unsafeIndex arr j
   76         n = A.unsafeIndex arr k
   77         j = off + i
   78         k = j + 1
   79 {-# INLINE iter #-}
   80 
   81 -- | /O(1)/ Iterate one step through a UTF-16 array, returning the
   82 -- delta to add to give the next offset to iterate at.
   83 iter_ :: Text -> Int -> Int
   84 -- entered 287,130 timesiter_ (Text arr off _len) i | m < 0xD800 || m > 0xDBFF = 1
   85                             | otherwise                = 2
   86   where m = A.unsafeIndex arr (off+i)
   87 {-# INLINE iter_ #-}
   88 
   89 -- | /O(1)/ Iterate one step backwards through a UTF-16 array,
   90 -- returning the current character and the delta to add (i.e. a
   91 -- negative number) to give the next offset to iterate at.
   92 reverseIter :: Text -> Int -> (Char,Int)
   93 -- entered 3466 timesreverseIter (Text arr off _len) i
   94     | m < 0xDC00 || m > 0xDFFF = (unsafeChr m, -1)
   95     | otherwise                = (chr2 n m,    -2)
   96   where m = A.unsafeIndex arr j
   97         n = A.unsafeIndex arr k
   98         j = off + i
   99         k = j - 1
  100 {-# INLINE reverseIter #-}
  101 
  102 -- | Just like unsafePerformIO, but we inline it. Big performance gains as
  103 -- it exposes lots of things to further inlining. /Very unsafe/. In
  104 -- particular, you should do no memory allocation inside an
  105 -- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@.
  106 --
  107 {-# INLINE inlinePerformIO #-}
  108 inlinePerformIO :: IO a -> a
  109 #if defined(__GLASGOW_HASKELL__)
  110 -- entered 138,212 timesinlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
  111 #else
  112 inlinePerformIO = unsafePerformIO
  113 #endif
  114 
  115 -- | Allow an 'ST' computation to be deferred lazily. When passed an
  116 -- action of type 'ST' @s@ @a@, the action will only be performed when
  117 -- the value of @a@ is demanded.
  118 --
  119 -- This function is identical to the normal unsafeInterleaveST, but is
  120 -- inlined and hence faster.
  121 --
  122 -- /Note/: This operation is highly unsafe, as it can introduce
  123 -- externally visible non-determinism into an 'ST' action.
  124 inlineInterleaveST :: ST s a -> ST s a
  125 -- entered 1497 timesinlineInterleaveST (ST m) = ST $ \ s ->
  126     let r = case m s of (# _, res #) -> res in (# s, r #)
  127 {-# INLINE inlineInterleaveST #-}
  128 
  129 -- | /O(1)/ Return the length of a 'Text' in units of 'Word16'.  This
  130 -- is useful for sizing a target array appropriately before using
  131 -- 'unsafeCopyToPtr'.
  132 lengthWord16 :: Text -> Int
  133 -- entered 39,502 timeslengthWord16 (Text _arr _off len) = len
  134 {-# INLINE lengthWord16 #-}