1 {-# LANGUAGE BangPatterns #-}
    2 
    3 -- |
    4 -- Module      : Data.Text.Encoding.Fusion.Common
    5 -- Copyright   : (c) Tom Harper 2008-2009,
    6 --               (c) Bryan O'Sullivan 2009,
    7 --               (c) Duncan Coutts 2009
    8 --
    9 -- License     : BSD-style
   10 -- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
   11 --               duncan@haskell.org
   12 -- Stability   : experimental
   13 -- Portability : portable
   14 --
   15 -- Fusible 'Stream'-oriented functions for converting between 'Text'
   16 -- and several common encodings.
   17 
   18 module Data.Text.Encoding.Fusion.Common
   19     (
   20     -- * Restreaming
   21     -- Restreaming is the act of converting from one 'Stream'
   22     -- representation to another.
   23       restreamUtf8
   24     , restreamUtf16LE
   25     , restreamUtf16BE
   26     , restreamUtf32LE
   27     , restreamUtf32BE
   28     ) where
   29 
   30 import Data.Bits ((.&.))
   31 import Data.Text.Fusion (Step(..), Stream(..))
   32 import Data.Text.Fusion.Internal (M(..), S(..))
   33 import Data.Text.UnsafeChar (ord)
   34 import Data.Text.UnsafeShift (shiftR)
   35 import Data.Word (Word8)
   36 import qualified Data.Text.Encoding.Utf8 as U8
   37 
   38 -- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
   39 restreamUtf8 :: Stream Char -> Stream Word8
   40 -- entered 400 timesrestreamUtf8 (Stream next0 s0 len) =
   41     Stream next (S s0 N N N) (len*2)
   42     where
   43       {-# INLINE next #-}
   44       next (S s N N N) = case next0 s of
   45                   Done              -> Done
   46                   Skip s'           -> Skip (S s' N N N)
   47                   Yield x xs
   48                       | n <= 0x7F   -> Yield c  (S xs N N N)
   49                       | n <= 0x07FF -> Yield a2 (S xs (J b2) N N)
   50                       | n <= 0xFFFF -> Yield a3 (S xs (J b3) (J c3) N)
   51                       | otherwise   -> Yield a4 (S xs (J b4) (J c4) (J d4))
   52                       where
   53                         n  = ord x
   54                         c  = fromIntegral n
   55                         (a2,b2) = U8.ord2 x
   56                         (a3,b3,c3) = U8.ord3 x
   57                         (a4,b4,c4,d4) = U8.ord4 x
   58       next (S s (J x2) N N)   = Yield x2 (S s N N N)
   59       next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
   60       next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
   61       next _ = internalError "restreamUtf8"
   62 {-# INLINE restreamUtf8 #-}
   63 
   64 restreamUtf16BE :: Stream Char -> Stream Word8
   65 -- entered 197 timesrestreamUtf16BE (Stream next0 s0 len) =
   66     Stream next (S s0 N N N) (len*2)
   67     where
   68       {-# INLINE next #-}
   69       next (S s N N N) = case next0 s of
   70           Done -> Done
   71           Skip s' -> Skip (S s' N N N)
   72           Yield x xs
   73               | n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $
   74                                S xs (J $ fromIntegral n) N N
   75               | otherwise   -> Yield c1 $
   76                                S xs (J c2) (J c3) (J c4)
   77               where
   78                 n  = ord x
   79                 n1 = n - 0x10000
   80                 c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
   81                 c2 = fromIntegral (n1 `shiftR` 10)
   82                 n2 = n1 .&. 0x3FF
   83                 c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
   84                 c4 = fromIntegral n2
   85       next (S s (J x2) N N)   = Yield x2 (S s N N N)
   86       next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
   87       next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
   88       next _ = internalError "restreamUtf16BE"
   89 {-# INLINE restreamUtf16BE #-}
   90 
   91 restreamUtf16LE :: Stream Char -> Stream Word8
   92 -- entered 196 timesrestreamUtf16LE (Stream next0 s0 len) =
   93     Stream next (S s0 N N N) (len*2)
   94     where
   95       {-# INLINE next #-}
   96       next (S s N N N) = case next0 s of
   97           Done -> Done
   98           Skip s' -> Skip (S s' N N N)
   99           Yield x xs
  100               | n < 0x10000 -> Yield (fromIntegral n) $
  101                                S xs (J (fromIntegral $ shiftR n 8)) N N
  102               | otherwise   -> Yield c1 $
  103                                S xs (J c2) (J c3) (J c4)
  104               where
  105                 n  = ord x
  106                 n1 = n - 0x10000
  107                 c2 = fromIntegral (shiftR n1 18 + 0xD8)
  108                 c1 = fromIntegral (shiftR n1 10)
  109                 n2 = n1 .&. 0x3FF
  110                 c4 = fromIntegral (shiftR n2 8 + 0xDC)
  111                 c3 = fromIntegral n2
  112       next (S s (J x2) N N)   = Yield x2 (S s N N N)
  113       next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
  114       next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
  115       next _ = internalError "restreamUtf16LE"
  116 {-# INLINE restreamUtf16LE #-}
  117 
  118 restreamUtf32BE :: Stream Char -> Stream Word8
  119 -- entered 194 timesrestreamUtf32BE (Stream next0 s0 len) =
  120     Stream next (S s0 N N N) (len*2)
  121     where
  122     {-# INLINE next #-}
  123     next (S s N N N) = case next0 s of
  124         Done       -> Done
  125         Skip s'    -> Skip (S s' N N N)
  126         Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
  127           where
  128             n  = ord x
  129             c1 = fromIntegral $ shiftR n 24
  130             c2 = fromIntegral $ shiftR n 16
  131             c3 = fromIntegral $ shiftR n 8
  132             c4 = fromIntegral n
  133     next (S s (J x2) N N) = Yield x2 (S s N N N)
  134     next (S s (J x2) x3 N)      = Yield x2 (S s x3 N N)
  135     next (S s (J x2) x3 x4)           = Yield x2 (S s x3 x4 N)
  136     next _ = internalError "restreamUtf32BE"
  137 {-# INLINE restreamUtf32BE #-}
  138 
  139 restreamUtf32LE :: Stream Char -> Stream Word8
  140 -- entered 194 timesrestreamUtf32LE (Stream next0 s0 len) =
  141     Stream next (S s0 N N N) (len*2)
  142     where
  143     {-# INLINE next #-}
  144     next (S s N N N) = case next0 s of
  145         Done       -> Done
  146         Skip s'    -> Skip (S s' N N N)
  147         Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
  148           where
  149             n  = ord x
  150             c4 = fromIntegral $ shiftR n 24
  151             c3 = fromIntegral $ shiftR n 16
  152             c2 = fromIntegral $ shiftR n 8
  153             c1 = fromIntegral n
  154     next (S s (J x2) N N)   = Yield x2 (S s N N N)
  155     next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
  156     next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
  157     next _ = internalError "restreamUtf32LE"
  158 {-# INLINE restreamUtf32LE #-}
  159 
  160 internalError :: String -> a
  161 -- never enteredinternalError func =
  162     error $ "Data.Text.Encoding.Fusion.Common." ++ func ++ ": internal error"