1 -- | 2 -- Module : Data.Text.Lazy.Encoding 3 -- Copyright : (c) 2009, 2010 Bryan O'Sullivan 4 -- 5 -- License : BSD-style 6 -- Maintainer : bos@serpentine.com, rtomharper@googlemail.com, 7 -- duncan@haskell.org 8 -- Stability : experimental 9 -- Portability : portable 10 -- 11 -- Functions for converting lazy 'Text' values to and from lazy 12 -- 'ByteString', using several standard encodings. 13 -- 14 -- To make use of a much larger variety of encodings, use the @text-icu@ 15 -- package. 16 17 module Data.Text.Lazy.Encoding 18 ( 19 -- * Decoding ByteStrings to Text 20 decodeASCII 21 , decodeUtf8 22 , decodeUtf16LE 23 , decodeUtf16BE 24 , decodeUtf32LE 25 , decodeUtf32BE 26 -- ** Controllable error handling 27 , decodeUtf8With 28 , decodeUtf16LEWith 29 , decodeUtf16BEWith 30 , decodeUtf32LEWith 31 , decodeUtf32BEWith 32 33 -- * Encoding Text to ByteStrings 34 , encodeUtf8 35 , encodeUtf16LE 36 , encodeUtf16BE 37 , encodeUtf32LE 38 , encodeUtf32BE 39 ) where 40 41 import qualified Data.ByteString.Lazy as B 42 import Data.Text.Encoding.Error (OnDecodeError, strictDecode) 43 import qualified Data.Text.Encoding as TE 44 import qualified Data.Text.Lazy.Fusion as F 45 import Data.Text.Lazy.Internal (Text(..), chunk, foldrChunks) 46 import qualified Data.Text.Lazy.Encoding.Fusion as E 47 48 -- | Decode a 'ByteString' containing 7-bit ASCII encoded text. 49 decodeASCII :: B.ByteString -> Text 50 -- entered 100 timesdecodeASCII bs = foldr (chunk . TE.decodeASCII) Empty (B.toChunks bs) 51 {-# INLINE decodeASCII #-} 52 53 -- | Decode a 'ByteString' containing UTF-8 encoded text. 54 decodeUtf8With :: OnDecodeError -> B.ByteString -> Text 55 -- entered 100 timesdecodeUtf8With onErr bs = F.unstream (E.streamUtf8 onErr bs) 56 {-# INLINE decodeUtf8With #-} 57 58 -- | Decode a 'ByteString' containing UTF-8 encoded text. 59 decodeUtf8 :: B.ByteString -> Text 60 -- entered 100 timesdecodeUtf8 = decodeUtf8With strictDecode 61 {-# INLINE decodeUtf8 #-} 62 63 -- | Encode text using UTF-8 encoding. 64 encodeUtf8 :: Text -> B.ByteString 65 -- entered 200 timesencodeUtf8 txt = E.unstream (E.restreamUtf8 (F.stream txt)) 66 {-# INLINE encodeUtf8 #-} 67 68 -- | Decode text from little endian UTF-16 encoding. 69 decodeUtf16LEWith :: OnDecodeError -> B.ByteString -> Text 70 -- entered 100 timesdecodeUtf16LEWith onErr bs = F.unstream (E.streamUtf16LE onErr bs) 71 {-# INLINE decodeUtf16LEWith #-} 72 73 -- | Decode text from little endian UTF-16 encoding. 74 decodeUtf16LE :: B.ByteString -> Text 75 -- entered 100 timesdecodeUtf16LE = decodeUtf16LEWith strictDecode 76 {-# INLINE decodeUtf16LE #-} 77 78 -- | Decode text from big endian UTF-16 encoding. 79 decodeUtf16BEWith :: OnDecodeError -> B.ByteString -> Text 80 -- entered 100 timesdecodeUtf16BEWith onErr bs = F.unstream (E.streamUtf16BE onErr bs) 81 {-# INLINE decodeUtf16BEWith #-} 82 83 -- | Decode text from big endian UTF-16 encoding. 84 decodeUtf16BE :: B.ByteString -> Text 85 -- entered 100 timesdecodeUtf16BE = decodeUtf16BEWith strictDecode 86 {-# INLINE decodeUtf16BE #-} 87 88 -- | Encode text using little endian UTF-16 encoding. 89 encodeUtf16LE :: Text -> B.ByteString 90 -- entered 100 timesencodeUtf16LE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf16LE) [] txt) 91 {-# INLINE encodeUtf16LE #-} 92 93 -- | Encode text using big endian UTF-16 encoding. 94 encodeUtf16BE :: Text -> B.ByteString 95 -- entered 100 timesencodeUtf16BE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf16BE) [] txt) 96 {-# INLINE encodeUtf16BE #-} 97 98 -- | Decode text from little endian UTF-32 encoding. 99 decodeUtf32LEWith :: OnDecodeError -> B.ByteString -> Text 100 -- entered 100 timesdecodeUtf32LEWith onErr bs = F.unstream (E.streamUtf32LE onErr bs) 101 {-# INLINE decodeUtf32LEWith #-} 102 103 -- | Decode text from little endian UTF-32 encoding. 104 decodeUtf32LE :: B.ByteString -> Text 105 -- entered 100 timesdecodeUtf32LE = decodeUtf32LEWith strictDecode 106 {-# INLINE decodeUtf32LE #-} 107 108 -- | Decode text from big endian UTF-32 encoding. 109 decodeUtf32BEWith :: OnDecodeError -> B.ByteString -> Text 110 -- entered 100 timesdecodeUtf32BEWith onErr bs = F.unstream (E.streamUtf32BE onErr bs) 111 {-# INLINE decodeUtf32BEWith #-} 112 113 -- | Decode text from big endian UTF-32 encoding. 114 decodeUtf32BE :: B.ByteString -> Text 115 -- entered 100 timesdecodeUtf32BE = decodeUtf32BEWith strictDecode 116 {-# INLINE decodeUtf32BE #-} 117 118 -- | Encode text using little endian UTF-32 encoding. 119 encodeUtf32LE :: Text -> B.ByteString 120 -- entered 100 timesencodeUtf32LE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf32LE) [] txt) 121 {-# INLINE encodeUtf32LE #-} 122 123 -- | Encode text using big endian UTF-32 encoding. 124 encodeUtf32BE :: Text -> B.ByteString 125 -- entered 100 timesencodeUtf32BE txt = B.fromChunks (foldrChunks ((:) . TE.encodeUtf32BE) [] txt) 126 {-# INLINE encodeUtf32BE #-}