1 {-# LANGUAGE CPP, DeriveDataTypeable #-} 2 -- | 3 -- Module : Data.Text.Encoding.Error 4 -- Copyright : (c) Bryan O'Sullivan 2009 5 -- 6 -- License : BSD-style 7 -- Maintainer : bos@serpentine.com, rtomharper@googlemail.com, 8 -- duncan@haskell.org 9 -- Stability : experimental 10 -- Portability : GHC 11 -- 12 -- Types and functions for dealing with encoding and decoding errors 13 -- in Unicode text. 14 -- 15 -- The standard functions for encoding and decoding text are strict, 16 -- which is to say that they throw exceptions on invalid input. This 17 -- is often unhelpful on real world input, so alternative functions 18 -- exist that accept custom handlers for dealing with invalid inputs. 19 -- These 'OnError' handlers are normal Haskell functions. You can use 20 -- one of the presupplied functions in this module, or you can write a 21 -- custom handler of your own. 22 23 module Data.Text.Encoding.Error 24 ( 25 -- * Error handling types 26 UnicodeException(..) 27 , OnError 28 , OnDecodeError 29 , OnEncodeError 30 -- * Useful error handling functions 31 , lenientDecode 32 , strictDecode 33 , strictEncode 34 , ignore 35 , replace 36 ) where 37 38 #if __GLASGOW_HASKELL__ >= 610 39 import Control.Exception (Exception, throw) 40 #else 41 import Control.Exception.Extensible (Exception, throw) 42 #endif 43 import Data.Typeable (Typeable) 44 import Data.Word (Word8) 45 import Numeric (showHex) 46 47 -- | Function type for handling a coding error. It is supplied with 48 -- two inputs: 49 -- 50 -- * A 'String' that describes the error. 51 -- 52 -- * The input value that caused the error. If the error arose 53 -- because the end of input was reached or could not be identified 54 -- precisely, this value will be 'Nothing'. 55 -- 56 -- If the handler returns a value wrapped with 'Just', that value will 57 -- be used in the output as the replacement for the invalid input. If 58 -- it returns 'Nothing', no value will be used in the output. 59 -- 60 -- Should the handler need to abort processing, it should use 'error' 61 -- or 'throw' an exception (preferably a 'UnicodeException'). It may 62 -- use the description provided to construct a more helpful error 63 -- report. 64 type OnError a b = String -> Maybe a -> Maybe b 65 type OnDecodeError = OnError Word8 Char 66 type OnEncodeError = OnError Char Word8 67 68 -- | An exception type for representing Unicode encoding errors. 69 data UnicodeException = 70 DecodeError String (Maybe Word8) 71 -- ^ Could not decode a byte sequence because it was invalid under 72 -- the given encoding, or ran out of input in mid-decode. 73 | EncodeError String (Maybe Char) 74 -- ^ Tried to encode a character that could not be represented 75 -- under the given encoding, or ran out of input in mid-encode. 76 deriving (-- entered 64 timesTypeable) 77 78 showUnicodeException :: UnicodeException -> String 79 -- entered 32 timesshowUnicodeException (DecodeError desc (Just w)) 80 = "Cannot decode byte '\\x" ++ showHex w ("': " ++ desc) 81 showUnicodeException (DecodeError desc Nothing) 82 = "Cannot decode input: " ++ desc 83 showUnicodeException (EncodeError desc (Just c)) 84 = "Cannot encode character '\\x" ++ showHex (fromEnum c) ("': " ++ desc) 85 showUnicodeException (EncodeError desc Nothing) 86 = "Cannot encode input: " ++ desc 87 88 instance Show UnicodeException where 89 -- entered onceshow = showUnicodeException 90 91 instance Exception UnicodeException 92 93 -- | Throw a 'UnicodeException' if decoding fails. 94 strictDecode :: OnError Word8 Char 95 -- entered 32 timesstrictDecode desc c = throw (DecodeError desc c) 96 97 -- | Replace an invalid input byte with the Unicode replacement 98 -- character U+FFFD. 99 lenientDecode :: OnError Word8 Char 100 -- entered 236 timeslenientDecode _ _ = Just '\xfffd' 101 102 -- | Throw a 'UnicodeException' if encoding fails. 103 strictEncode :: OnError Char Word8 104 -- never enteredstrictEncode desc c = throw (EncodeError desc c) 105 106 -- | Ignore an invalid input, substituting nothing in the output. 107 ignore :: OnError a b 108 -- entered 188 timesignore _ _ = Nothing 109 110 -- | Replace an invalid input with a valid output. 111 replace :: b -> OnError a b 112 -- never enteredreplace c _ _ = Just c