1 {-# LANGUAGE BangPatterns #-} 2 3 -- | 4 -- Module : Data.Text.Encoding.Fusion.Common 5 -- Copyright : (c) Tom Harper 2008-2009, 6 -- (c) Bryan O'Sullivan 2009, 7 -- (c) Duncan Coutts 2009 8 -- 9 -- License : BSD-style 10 -- Maintainer : bos@serpentine.com, rtomharper@googlemail.com, 11 -- duncan@haskell.org 12 -- Stability : experimental 13 -- Portability : portable 14 -- 15 -- Fusible 'Stream'-oriented functions for converting between 'Text' 16 -- and several common encodings. 17 18 module Data.Text.Encoding.Fusion.Common 19 ( 20 -- * Restreaming 21 -- Restreaming is the act of converting from one 'Stream' 22 -- representation to another. 23 restreamUtf8 24 , restreamUtf16LE 25 , restreamUtf16BE 26 , restreamUtf32LE 27 , restreamUtf32BE 28 ) where 29 30 import Data.Bits ((.&.)) 31 import Data.Text.Fusion (Step(..), Stream(..)) 32 import Data.Text.Fusion.Internal (M(..), S(..)) 33 import Data.Text.UnsafeChar (ord) 34 import Data.Text.UnsafeShift (shiftR) 35 import Data.Word (Word8) 36 import qualified Data.Text.Encoding.Utf8 as U8 37 38 -- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8. 39 restreamUtf8 :: Stream Char -> Stream Word8 40 -- entered 400 timesrestreamUtf8 (Stream next0 s0 len) = 41 Stream next (S s0 N N N) (len*2) 42 where 43 {-# INLINE next #-} 44 next (S s N N N) = case next0 s of 45 Done -> Done 46 Skip s' -> Skip (S s' N N N) 47 Yield x xs 48 | n <= 0x7F -> Yield c (S xs N N N) 49 | n <= 0x07FF -> Yield a2 (S xs (J b2) N N) 50 | n <= 0xFFFF -> Yield a3 (S xs (J b3) (J c3) N) 51 | otherwise -> Yield a4 (S xs (J b4) (J c4) (J d4)) 52 where 53 n = ord x 54 c = fromIntegral n 55 (a2,b2) = U8.ord2 x 56 (a3,b3,c3) = U8.ord3 x 57 (a4,b4,c4,d4) = U8.ord4 x 58 next (S s (J x2) N N) = Yield x2 (S s N N N) 59 next (S s (J x2) x3 N) = Yield x2 (S s x3 N N) 60 next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N) 61 next _ = internalError "restreamUtf8" 62 {-# INLINE restreamUtf8 #-} 63 64 restreamUtf16BE :: Stream Char -> Stream Word8 65 -- entered 197 timesrestreamUtf16BE (Stream next0 s0 len) = 66 Stream next (S s0 N N N) (len*2) 67 where 68 {-# INLINE next #-} 69 next (S s N N N) = case next0 s of 70 Done -> Done 71 Skip s' -> Skip (S s' N N N) 72 Yield x xs 73 | n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $ 74 S xs (J $ fromIntegral n) N N 75 | otherwise -> Yield c1 $ 76 S xs (J c2) (J c3) (J c4) 77 where 78 n = ord x 79 n1 = n - 0x10000 80 c1 = fromIntegral (n1 `shiftR` 18 + 0xD8) 81 c2 = fromIntegral (n1 `shiftR` 10) 82 n2 = n1 .&. 0x3FF 83 c3 = fromIntegral (n2 `shiftR` 8 + 0xDC) 84 c4 = fromIntegral n2 85 next (S s (J x2) N N) = Yield x2 (S s N N N) 86 next (S s (J x2) x3 N) = Yield x2 (S s x3 N N) 87 next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N) 88 next _ = internalError "restreamUtf16BE" 89 {-# INLINE restreamUtf16BE #-} 90 91 restreamUtf16LE :: Stream Char -> Stream Word8 92 -- entered 196 timesrestreamUtf16LE (Stream next0 s0 len) = 93 Stream next (S s0 N N N) (len*2) 94 where 95 {-# INLINE next #-} 96 next (S s N N N) = case next0 s of 97 Done -> Done 98 Skip s' -> Skip (S s' N N N) 99 Yield x xs 100 | n < 0x10000 -> Yield (fromIntegral n) $ 101 S xs (J (fromIntegral $ shiftR n 8)) N N 102 | otherwise -> Yield c1 $ 103 S xs (J c2) (J c3) (J c4) 104 where 105 n = ord x 106 n1 = n - 0x10000 107 c2 = fromIntegral (shiftR n1 18 + 0xD8) 108 c1 = fromIntegral (shiftR n1 10) 109 n2 = n1 .&. 0x3FF 110 c4 = fromIntegral (shiftR n2 8 + 0xDC) 111 c3 = fromIntegral n2 112 next (S s (J x2) N N) = Yield x2 (S s N N N) 113 next (S s (J x2) x3 N) = Yield x2 (S s x3 N N) 114 next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N) 115 next _ = internalError "restreamUtf16LE" 116 {-# INLINE restreamUtf16LE #-} 117 118 restreamUtf32BE :: Stream Char -> Stream Word8 119 -- entered 194 timesrestreamUtf32BE (Stream next0 s0 len) = 120 Stream next (S s0 N N N) (len*2) 121 where 122 {-# INLINE next #-} 123 next (S s N N N) = case next0 s of 124 Done -> Done 125 Skip s' -> Skip (S s' N N N) 126 Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4)) 127 where 128 n = ord x 129 c1 = fromIntegral $ shiftR n 24 130 c2 = fromIntegral $ shiftR n 16 131 c3 = fromIntegral $ shiftR n 8 132 c4 = fromIntegral n 133 next (S s (J x2) N N) = Yield x2 (S s N N N) 134 next (S s (J x2) x3 N) = Yield x2 (S s x3 N N) 135 next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N) 136 next _ = internalError "restreamUtf32BE" 137 {-# INLINE restreamUtf32BE #-} 138 139 restreamUtf32LE :: Stream Char -> Stream Word8 140 -- entered 194 timesrestreamUtf32LE (Stream next0 s0 len) = 141 Stream next (S s0 N N N) (len*2) 142 where 143 {-# INLINE next #-} 144 next (S s N N N) = case next0 s of 145 Done -> Done 146 Skip s' -> Skip (S s' N N N) 147 Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4)) 148 where 149 n = ord x 150 c4 = fromIntegral $ shiftR n 24 151 c3 = fromIntegral $ shiftR n 16 152 c2 = fromIntegral $ shiftR n 8 153 c1 = fromIntegral n 154 next (S s (J x2) N N) = Yield x2 (S s N N N) 155 next (S s (J x2) x3 N) = Yield x2 (S s x3 N N) 156 next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N) 157 next _ = internalError "restreamUtf32LE" 158 {-# INLINE restreamUtf32LE #-} 159 160 internalError :: String -> a 161 -- never enteredinternalError func = 162 error $ "Data.Text.Encoding.Fusion.Common." ++ func ++ ": internal error"