1 {-# LANGUAGE BangPatterns, CPP #-} 2 -- | 3 -- Module : Data.Text.Foreign 4 -- Copyright : (c) 2009, 2010 Bryan O'Sullivan 5 -- 6 -- License : BSD-style 7 -- Maintainer : bos@serpentine.com, rtomharper@googlemail.com, 8 -- duncan@haskell.org 9 -- Stability : experimental 10 -- Portability : GHC 11 -- 12 -- Support for using 'Text' data with native code via the Haskell 13 -- foreign function interface. 14 15 module Data.Text.Foreign 16 ( 17 -- * Interoperability with native code 18 -- $interop 19 20 -- * Safe conversion functions 21 fromPtr 22 , useAsPtr 23 -- * Unsafe conversion code 24 , lengthWord16 25 , unsafeCopyToPtr 26 -- * Low-level manipulation 27 -- $lowlevel 28 , dropWord16 29 , takeWord16 30 ) where 31 32 #if defined(ASSERTS) 33 import Control.Exception (assert) 34 #endif 35 import Control.Monad.ST (unsafeIOToST) 36 import Data.Text.Internal (Text(..), empty) 37 import Data.Text.Unsafe (lengthWord16) 38 import qualified Data.Text.Array as A 39 import Data.Word (Word16) 40 import Foreign.Marshal.Alloc (allocaBytes) 41 import Foreign.Ptr (Ptr, castPtr, plusPtr) 42 import Foreign.Storable (peek, poke) 43 44 -- $interop 45 -- 46 -- The 'Text' type is implemented using arrays that are not guaranteed 47 -- to have a fixed address in the Haskell heap. All communication with 48 -- native code must thus occur by copying data back and forth. 49 -- 50 -- The 'Text' type's internal representation is UTF-16, using the 51 -- platform's native endianness. This makes copied data suitable for 52 -- use with native libraries that use a similar representation, such 53 -- as ICU. To interoperate with native libraries that use different 54 -- internal representations, such as UTF-8 or UTF-32, consider using 55 -- the functions in the 'Data.Text.Encoding' module. 56 57 -- | /O(n)/ Create a new 'Text' from a 'Ptr' 'Word16' by copying the 58 -- contents of the array. 59 fromPtr :: Ptr Word16 -- ^ source array 60 -> Int -- ^ length of source array (in 'Word16' units) 61 -> IO Text 62 -- entered 100 timesfromPtr _ 0 = return empty 63 fromPtr ptr len = 64 #if defined(ASSERTS) 65 assert (len > 0) $ 66 #endif 67 return $! Text arr 0 len 68 where 69 arr = A.run (A.unsafeNew len >>= copy) 70 copy marr = loop ptr 0 71 where 72 loop !p !i | i == len = return marr 73 | otherwise = do 74 A.unsafeWrite marr i =<< unsafeIOToST (peek p) 75 loop (p `plusPtr` 2) (i + 1) 76 77 -- $lowlevel 78 -- 79 -- Foreign functions that use UTF-16 internally may return indices in 80 -- units of 'Word16' instead of characters. These functions may 81 -- safely be used with such indices, as they will adjust offsets if 82 -- necessary to preserve the validity of a Unicode string. 83 84 -- | /O(1)/ Return the prefix of the 'Text' of @n@ 'Word16' units in 85 -- length. 86 -- 87 -- If @n@ would cause the 'Text' to end inside a surrogate pair, the 88 -- end of the prefix will be advanced by one additional 'Word16' unit 89 -- to maintain its validity. 90 takeWord16 :: Int -> Text -> Text 91 -- entered 200 timestakeWord16 n t@(Text arr off len) 92 | n <= 0 = empty 93 | n >= len || m >= len = t 94 | otherwise = Text arr off m 95 where 96 m | w < 0xDB00 || w > 0xD8FF = n 97 | otherwise = n+1 98 w = A.unsafeIndex arr (off+n-1) 99 100 -- | /O(1)/ Return the suffix of the 'Text', with @n@ 'Word16' units 101 -- dropped from its beginning. 102 -- 103 -- If @n@ would cause the 'Text' to begin inside a surrogate pair, the 104 -- beginning of the suffix will be advanced by one additional 'Word16' 105 -- unit to maintain its validity. 106 dropWord16 :: Int -> Text -> Text 107 -- entered 200 timesdropWord16 n t@(Text arr off len) 108 | n <= 0 = t 109 | n >= len || m >= len = empty 110 | otherwise = Text arr (off+m) (len-m) 111 where 112 m | w < 0xD800 || w > 0xDBFF = n 113 | otherwise = n+1 114 w = A.unsafeIndex arr (off+n-1) 115 116 -- | /O(n)/ Copy a 'Text' to an array. The array is assumed to be big 117 -- enough to hold the contents of the entire 'Text'. 118 unsafeCopyToPtr :: Text -> Ptr Word16 -> IO () 119 -- entered 100 timesunsafeCopyToPtr (Text arr off len) ptr = loop ptr off 120 where 121 end = off + len 122 loop !p !i | i == end = return () 123 | otherwise = do 124 poke p (A.unsafeIndex arr i) 125 loop (p `plusPtr` 2) (i + 1) 126 127 -- | /O(n)/ Perform an action on a temporary, mutable copy of a 128 -- 'Text'. The copy is freed as soon as the action returns. 129 useAsPtr :: Text -> (Ptr Word16 -> Int -> IO a) -> IO a 130 -- entered 100 timesuseAsPtr t@(Text _arr _off len) action = 131 allocaBytes (len * 2) $ \buf -> do 132 unsafeCopyToPtr t buf 133 action (castPtr buf) len