1 {-# LANGUAGE CPP, DeriveDataTypeable #-}
    2 -- |
    3 -- Module      : Data.Text.Encoding.Error
    4 -- Copyright   : (c) Bryan O'Sullivan 2009
    5 --
    6 -- License     : BSD-style
    7 -- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
    8 --               duncan@haskell.org
    9 -- Stability   : experimental
   10 -- Portability : GHC
   11 --
   12 -- Types and functions for dealing with encoding and decoding errors
   13 -- in Unicode text.
   14 --
   15 -- The standard functions for encoding and decoding text are strict,
   16 -- which is to say that they throw exceptions on invalid input.  This
   17 -- is often unhelpful on real world input, so alternative functions
   18 -- exist that accept custom handlers for dealing with invalid inputs.
   19 -- These 'OnError' handlers are normal Haskell functions.  You can use
   20 -- one of the presupplied functions in this module, or you can write a
   21 -- custom handler of your own.
   22 
   23 module Data.Text.Encoding.Error
   24     (
   25     -- * Error handling types
   26       UnicodeException(..)
   27     , OnError
   28     , OnDecodeError
   29     , OnEncodeError
   30     -- * Useful error handling functions
   31     , lenientDecode
   32     , strictDecode
   33     , strictEncode
   34     , ignore
   35     , replace
   36     ) where
   37 
   38 #if __GLASGOW_HASKELL__ >= 610
   39 import Control.Exception (Exception, throw)
   40 #else
   41 import Control.Exception.Extensible (Exception, throw)
   42 #endif
   43 import Data.Typeable (Typeable)
   44 import Data.Word (Word8)
   45 import Numeric (showHex)
   46 
   47 -- | Function type for handling a coding error.  It is supplied with
   48 -- two inputs:
   49 --
   50 -- * A 'String' that describes the error.
   51 --
   52 -- * The input value that caused the error.  If the error arose
   53 --   because the end of input was reached or could not be identified
   54 --   precisely, this value will be 'Nothing'.
   55 --
   56 -- If the handler returns a value wrapped with 'Just', that value will
   57 -- be used in the output as the replacement for the invalid input.  If
   58 -- it returns 'Nothing', no value will be used in the output.
   59 --
   60 -- Should the handler need to abort processing, it should use 'error'
   61 -- or 'throw' an exception (preferably a 'UnicodeException').  It may
   62 -- use the description provided to construct a more helpful error
   63 -- report.
   64 type OnError a b = String -> Maybe a -> Maybe b
   65 type OnDecodeError = OnError Word8 Char
   66 type OnEncodeError = OnError Char Word8
   67 
   68 -- | An exception type for representing Unicode encoding errors.
   69 data UnicodeException =
   70     DecodeError String (Maybe Word8)
   71     -- ^ Could not decode a byte sequence because it was invalid under
   72     -- the given encoding, or ran out of input in mid-decode.
   73   | EncodeError String (Maybe Char)
   74     -- ^ Tried to encode a character that could not be represented
   75     -- under the given encoding, or ran out of input in mid-encode.
   76     deriving (-- entered 64 timesTypeable)
   77 
   78 showUnicodeException :: UnicodeException -> String
   79 -- entered 32 timesshowUnicodeException (DecodeError desc (Just w))
   80     = "Cannot decode byte '\\x" ++ showHex w ("': " ++ desc)
   81 showUnicodeException (DecodeError desc Nothing)
   82     = "Cannot decode input: " ++ desc
   83 showUnicodeException (EncodeError desc (Just c))
   84     = "Cannot encode character '\\x" ++ showHex (fromEnum c) ("': " ++ desc)
   85 showUnicodeException (EncodeError desc Nothing)
   86     = "Cannot encode input: " ++ desc
   87                      
   88 instance Show UnicodeException where
   89     -- entered onceshow = showUnicodeException
   90 
   91 instance Exception UnicodeException
   92 
   93 -- | Throw a 'UnicodeException' if decoding fails.
   94 strictDecode :: OnError Word8 Char
   95 -- entered 32 timesstrictDecode desc c = throw (DecodeError desc c)
   96 
   97 -- | Replace an invalid input byte with the Unicode replacement
   98 -- character U+FFFD.
   99 lenientDecode :: OnError Word8 Char
  100 -- entered 236 timeslenientDecode _ _ = Just '\xfffd'
  101 
  102 -- | Throw a 'UnicodeException' if encoding fails.
  103 strictEncode :: OnError Char Word8
  104 -- never enteredstrictEncode desc c = throw (EncodeError desc c)
  105 
  106 -- | Ignore an invalid input, substituting nothing in the output.
  107 ignore :: OnError a b
  108 -- entered 188 timesignore _ _ = Nothing
  109 
  110 -- | Replace an invalid input with a valid output.
  111 replace :: b -> OnError a b
  112 -- never enteredreplace c _ _ = Just c