1 {-# LANGUAGE CPP, MagicHash, UnboxedTuples #-} 2 -- | 3 -- Module : Data.Text.Unsafe 4 -- Copyright : (c) 2009, 2010 Bryan O'Sullivan 5 -- License : BSD-style 6 -- Maintainer : bos@serpentine.com, rtomharper@googlemail.com, 7 -- duncan@haskell.org 8 -- Stability : experimental 9 -- Portability : portable 10 -- 11 -- A module containing unsafe 'Text' operations, for very very careful 12 -- use in heavily tested code. 13 module Data.Text.Unsafe 14 ( 15 inlineInterleaveST 16 , inlinePerformIO 17 , Iter(..) 18 , iter 19 , iter_ 20 , reverseIter 21 , unsafeHead 22 , unsafeTail 23 , lengthWord16 24 ) where 25 26 #if defined(ASSERTS) 27 import Control.Exception (assert) 28 #endif 29 import Data.Text.Encoding.Utf16 (chr2) 30 import Data.Text.Internal (Text(..)) 31 import Data.Text.UnsafeChar (unsafeChr) 32 import GHC.ST (ST(..)) 33 import qualified Data.Text.Array as A 34 #if defined(__GLASGOW_HASKELL__) 35 # if __GLASGOW_HASKELL__ >= 611 36 import GHC.IO (IO(IO)) 37 # else 38 import GHC.IOBase (IO(IO)) 39 # endif 40 import GHC.Base (realWorld#) 41 #endif 42 43 -- | /O(1)/ A variant of 'head' for non-empty 'Text'. 'unsafeHead' 44 -- omits the check for the empty case, so there is an obligation on 45 -- the programmer to provide a proof that the 'Text' is non-empty. 46 unsafeHead :: Text -> Char 47 -- entered 27,538 timesunsafeHead (Text arr off _len) 48 | m < 0xD800 || m > 0xDBFF = unsafeChr m 49 | otherwise = chr2 m n 50 where m = A.unsafeIndex arr off 51 n = A.unsafeIndex arr (off+1) 52 {-# INLINE unsafeHead #-} 53 54 -- | /O(1)/ A variant of 'tail' for non-empty 'Text'. 'unsafeHead' 55 -- omits the check for the empty case, so there is an obligation on 56 -- the programmer to provide a proof that the 'Text' is non-empty. 57 unsafeTail :: Text -> Text 58 -- entered 109,112 timesunsafeTail t@(Text arr off len) = 59 #if defined(ASSERTS) 60 assert (d <= len) $ 61 #endif 62 Text arr (off+d) (len-d) 63 where d = iter_ t 0 64 {-# INLINE unsafeTail #-} 65 66 data Iter = Iter {-# UNPACK #-} !Char {-# UNPACK #-} !Int 67 68 -- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16 69 -- array, returning the current character and the delta to add to give 70 -- the next offset to iterate at. 71 iter :: Text -> Int -> Iter 72 -- entered 2,761,610 timesiter (Text arr off _len) i 73 | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1 74 | otherwise = Iter (chr2 m n) 2 75 where m = A.unsafeIndex arr j 76 n = A.unsafeIndex arr k 77 j = off + i 78 k = j + 1 79 {-# INLINE iter #-} 80 81 -- | /O(1)/ Iterate one step through a UTF-16 array, returning the 82 -- delta to add to give the next offset to iterate at. 83 iter_ :: Text -> Int -> Int 84 -- entered 287,130 timesiter_ (Text arr off _len) i | m < 0xD800 || m > 0xDBFF = 1 85 | otherwise = 2 86 where m = A.unsafeIndex arr (off+i) 87 {-# INLINE iter_ #-} 88 89 -- | /O(1)/ Iterate one step backwards through a UTF-16 array, 90 -- returning the current character and the delta to add (i.e. a 91 -- negative number) to give the next offset to iterate at. 92 reverseIter :: Text -> Int -> (Char,Int) 93 -- entered 3466 timesreverseIter (Text arr off _len) i 94 | m < 0xDC00 || m > 0xDFFF = (unsafeChr m, -1) 95 | otherwise = (chr2 n m, -2) 96 where m = A.unsafeIndex arr j 97 n = A.unsafeIndex arr k 98 j = off + i 99 k = j - 1 100 {-# INLINE reverseIter #-} 101 102 -- | Just like unsafePerformIO, but we inline it. Big performance gains as 103 -- it exposes lots of things to further inlining. /Very unsafe/. In 104 -- particular, you should do no memory allocation inside an 105 -- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@. 106 -- 107 {-# INLINE inlinePerformIO #-} 108 inlinePerformIO :: IO a -> a 109 #if defined(__GLASGOW_HASKELL__) 110 -- entered 138,212 timesinlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r 111 #else 112 inlinePerformIO = unsafePerformIO 113 #endif 114 115 -- | Allow an 'ST' computation to be deferred lazily. When passed an 116 -- action of type 'ST' @s@ @a@, the action will only be performed when 117 -- the value of @a@ is demanded. 118 -- 119 -- This function is identical to the normal unsafeInterleaveST, but is 120 -- inlined and hence faster. 121 -- 122 -- /Note/: This operation is highly unsafe, as it can introduce 123 -- externally visible non-determinism into an 'ST' action. 124 inlineInterleaveST :: ST s a -> ST s a 125 -- entered 1497 timesinlineInterleaveST (ST m) = ST $ \ s -> 126 let r = case m s of (# _, res #) -> res in (# s, r #) 127 {-# INLINE inlineInterleaveST #-} 128 129 -- | /O(1)/ Return the length of a 'Text' in units of 'Word16'. This 130 -- is useful for sizing a target array appropriately before using 131 -- 'unsafeCopyToPtr'. 132 lengthWord16 :: Text -> Int 133 -- entered 39,502 timeslengthWord16 (Text _arr _off len) = len 134 {-# INLINE lengthWord16 #-}