1 {-# LANGUAGE BangPatterns, CPP, RecordWildCards, ScopedTypeVariables #-}
    2 -- |
    3 -- Module      : Data.Text.IO
    4 -- Copyright   : (c) 2009, 2010 Bryan O'Sullivan,
    5 --               (c) 2009 Simon Marlow
    6 -- License     : BSD-style
    7 -- Maintainer  : bos@serpentine.com
    8 -- Stability   : experimental
    9 -- Portability : GHC
   10 --
   11 -- Efficient locale-sensitive support for text I\/O.
   12 
   13 module Data.Text.IO
   14     (
   15     -- * Locale support
   16     -- $locale
   17     -- * File-at-a-time operations
   18       readFile
   19     , writeFile
   20     , appendFile
   21     -- * Operations on handles
   22     , hGetContents
   23     , hGetLine
   24     , hPutStr
   25     , hPutStrLn
   26     -- * Special cases for standard input and output
   27     , interact
   28     , getContents
   29     , getLine
   30     , putStr
   31     , putStrLn
   32     ) where
   33 
   34 import Data.Text (Text)
   35 import Prelude hiding (appendFile, catch, getContents, getLine, interact,
   36                        putStr, putStrLn, readFile, writeFile)
   37 import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout,
   38                   withFile)
   39 #if __GLASGOW_HASKELL__ <= 610
   40 import qualified Data.ByteString.Char8 as B
   41 import Data.Text.Encoding (decodeUtf8, encodeUtf8)
   42 #else
   43 import Control.Exception (catch, throw)
   44 import Control.Monad (liftM2, when)
   45 import Data.IORef (readIORef, writeIORef)
   46 import qualified Data.Text as T
   47 import Data.Text.Fusion (stream)
   48 import Data.Text.Fusion.Internal (Step(..), Stream(..))
   49 import Data.Text.IO.Internal (hGetLineWith, readChunk)
   50 import GHC.IO.Buffer (Buffer(..), BufferState(..), CharBufElem, CharBuffer,
   51                       RawCharBuffer, emptyBuffer, isEmptyBuffer, newCharBuffer,
   52                       writeCharBuf)
   53 import GHC.IO.Exception (IOException(ioe_type), IOErrorType(InappropriateType))
   54 import GHC.IO.Handle.Internals (augmentIOError, hClose_help, wantReadableHandle,
   55                                 wantWritableHandle)
   56 import GHC.IO.Handle.Text (commitBuffer')
   57 import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..),
   58                             HandleType(..), Newline(..))
   59 import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell)
   60 import System.IO.Error (isEOFError)
   61 #endif
   62 
   63 -- | The 'readFile' function reads a file and returns the contents of
   64 -- the file as a string.  The entire file is read strictly, as with
   65 -- 'getContents'.
   66 readFile :: FilePath -> IO Text
   67 -- never enteredreadFile name = openFile name ReadMode >>= hGetContents
   68 
   69 -- | Write a string to a file.  The file is truncated to zero length
   70 -- before writing begins.
   71 writeFile :: FilePath -> Text -> IO ()
   72 -- never enteredwriteFile p = withFile p WriteMode . flip hPutStr
   73 
   74 -- | Write a string the end of a file.
   75 appendFile :: FilePath -> Text -> IO ()
   76 -- never enteredappendFile p = withFile p AppendMode . flip hPutStr
   77 
   78 -- | Read the remaining contents of a 'Handle' as a string.  The
   79 -- 'Handle' is closed once the contents have been read, or if an
   80 -- exception is thrown.
   81 --
   82 -- Internally, this function reads a chunk at a time from the
   83 -- lower-level buffering abstraction, and concatenates the chunks into
   84 -- a single string once the entire file has been read.
   85 --
   86 -- As a result, it requires approximately twice as much memory as its
   87 -- result to construct its result.  For files more than a half of
   88 -- available RAM in size, this may result in memory exhaustion.
   89 hGetContents :: Handle -> IO Text
   90 #if __GLASGOW_HASKELL__ <= 610
   91 hGetContents = fmap decodeUtf8 . B.hGetContents
   92 #else
   93 -- entered 100 timeshGetContents h = do
   94   chooseGoodBuffering h
   95   wantReadableHandle "hGetContents" h readAll
   96  where
   97   readAll hh@Handle__{..} = do
   98     let catchError e
   99           | isEOFError e = do
  100               buf <- readIORef haCharBuffer
  101               return $ if isEmptyBuffer buf
  102                        then T.empty
  103                        else T.singleton '\r'
  104           | otherwise = throw (augmentIOError e "hGetContents" h)
  105         readChunks = do
  106           buf <- readIORef haCharBuffer
  107           t <- readChunk hh buf `catch` catchError
  108           if T.null t
  109             then return [t]
  110             else (t:) `fmap` readChunks
  111     ts <- readChunks
  112     (hh', _) <- hClose_help hh
  113     return (hh'{haType=ClosedHandle}, T.concat ts)
  114 #endif
  115   
  116 -- | Use a more efficient buffer size if we're reading in
  117 -- block-buffered mode with the default buffer size.  When we can
  118 -- determine the size of the handle we're reading, set the buffer size
  119 -- to that, so that we can read the entire file in one chunk.
  120 -- Otherwise, use a buffer size of at least 16KB.
  121 chooseGoodBuffering :: Handle -> IO ()
  122 -- entered 100 timeschooseGoodBuffering h = do
  123   bufMode <- hGetBuffering h
  124   case bufMode of
  125     BlockBuffering Nothing -> do
  126       d <- catch (liftM2 (-) (hFileSize h) (hTell h)) $ \(e::IOException) ->
  127            if ioe_type e == InappropriateType
  128            then return 16384 -- faster than the 2KB default
  129            else throw e
  130       when (d > 0) . hSetBuffering h . BlockBuffering . Just . fromIntegral $ d
  131     _ -> return ()
  132 
  133 -- | Read a single line from a handle.
  134 hGetLine :: Handle -> IO Text
  135 #if __GLASGOW_HASKELL__ <= 610
  136 hGetLine = fmap decodeUtf8 . B.hGetLine
  137 #else
  138 -- entered oncehGetLine = hGetLineWith T.concat
  139 #endif
  140 
  141 -- | Write a string to a handle.
  142 hPutStr :: Handle -> Text -> IO ()
  143 #if __GLASGOW_HASKELL__ <= 610
  144 hPutStr h = B.hPutStr h . encodeUtf8
  145 #else
  146 -- This function is lifted almost verbatim from GHC.IO.Handle.Text.
  147 -- entered 4438 timeshPutStr h t = do
  148   (buffer_mode, nl) <- 
  149        wantWritableHandle "hPutStr" h $ \h_ -> do
  150                      bmode <- getSpareBuffer h_
  151                      return (bmode, haOutputNL h_)
  152   let str = stream t
  153   case buffer_mode of
  154      (NoBuffering, _)        -> hPutChars h str
  155      (LineBuffering, buf)    -> writeLines h nl buf str
  156      (BlockBuffering _, buf)
  157          | nl == CRLF        -> writeBlocksCRLF h buf str
  158          | otherwise         -> writeBlocksRaw h buf str
  159 
  160 hPutChars :: Handle -> Stream Char -> IO ()
  161 -- entered 1088 timeshPutChars h (Stream next0 s0 _len) = loop s0
  162   where
  163     loop !s = case next0 s of
  164                 Done       -> return ()
  165                 Skip s'    -> loop s'
  166                 Yield x s' -> hPutChar h x >> loop s'
  167 
  168 -- The following functions are largely lifted from GHC.IO.Handle.Text,
  169 -- but adapted to a coinductive stream of data instead of an inductive
  170 -- list.
  171 --
  172 -- We have several variations of more or less the same code for
  173 -- performance reasons.  Splitting the original buffered write
  174 -- function into line- and block-oriented versions gave us a 2.1x
  175 -- performance improvement.  Lifting out the raw/cooked newline
  176 -- handling gave a few more percent on top.
  177 
  178 writeLines :: Handle -> Newline -> Buffer CharBufElem -> Stream Char -> IO ()
  179 -- entered 1280 timeswriteLines h nl buf0 (Stream next0 s0 _len) = outer s0 buf0
  180  where
  181   outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
  182    where
  183     inner !s !n =
  184       case next0 s of
  185         Done -> commit n False{-no flush-} True{-release-} >> return ()
  186         Skip s' -> inner s' n
  187         Yield x s'
  188           | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
  189           | x == '\n'    -> do
  190                    n' <- if nl == CRLF
  191                          then do n1 <- writeCharBuf raw n '\r'
  192                                  writeCharBuf raw n1 '\n'
  193                          else writeCharBuf raw n x
  194                    commit n' True{-needs flush-} False >>= outer s'
  195           | otherwise    -> writeCharBuf raw n x >>= inner s'
  196     commit = commitBuffer h raw len
  197 
  198 writeBlocksCRLF :: Handle -> Buffer CharBufElem -> Stream Char -> IO ()
  199 -- entered 512 timeswriteBlocksCRLF h buf0 (Stream next0 s0 _len) = outer s0 buf0
  200  where
  201   outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
  202    where
  203     inner !s !n =
  204       case next0 s of
  205         Done -> commit n False{-no flush-} True{-release-} >> return ()
  206         Skip s' -> inner s' n
  207         Yield x s'
  208           | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
  209           | x == '\n'    -> do n1 <- writeCharBuf raw n '\r'
  210                                writeCharBuf raw n1 '\n' >>= inner s'
  211           | otherwise    -> writeCharBuf raw n x >>= inner s'
  212     commit = commitBuffer h raw len
  213 
  214 writeBlocksRaw :: Handle -> Buffer CharBufElem -> Stream Char -> IO ()
  215 -- entered 1558 timeswriteBlocksRaw h buf0 (Stream next0 s0 _len) = outer s0 buf0
  216  where
  217   outer s1 Buffer{bufRaw=raw, bufSize=len} = inner s1 (0::Int)
  218    where
  219     inner !s !n =
  220       case next0 s of
  221         Done -> commit n False{-no flush-} True{-release-} >> return ()
  222         Skip s' -> inner s' n
  223         Yield x s'
  224           | n + 1 >= len -> commit n True{-needs flush-} False >>= outer s
  225           | otherwise    -> writeCharBuf raw n x >>= inner s'
  226     commit = commitBuffer h raw len
  227 
  228 -- This function is completely lifted from GHC.IO.Handle.Text.
  229 getSpareBuffer :: Handle__ -> IO (BufferMode, CharBuffer)
  230 -- entered 4438 timesgetSpareBuffer Handle__{haCharBuffer=ref, 
  231                         haBuffers=spare_ref,
  232                         haBufferMode=mode}
  233  = do
  234    case mode of
  235      NoBuffering -> return (mode, error "no buffer!")
  236      _ -> do
  237           bufs <- readIORef spare_ref
  238           buf  <- readIORef ref
  239           case bufs of
  240             BufferListCons b rest -> do
  241                 writeIORef spare_ref rest
  242                 return ( mode, emptyBuffer b (bufSize buf) WriteBuffer)
  243             BufferListNil -> do
  244                 new_buf <- newCharBuffer (bufSize buf) WriteBuffer
  245                 return (mode, new_buf)
  246 
  247 
  248 -- This function is completely lifted from GHC.IO.Handle.Text.
  249 commitBuffer :: Handle -> RawCharBuffer -> Int -> Int -> Bool -> Bool
  250              -> IO CharBuffer
  251 -- entered 4727 timescommitBuffer hdl !raw !sz !count flush release = 
  252   wantWritableHandle "commitAndReleaseBuffer" hdl $
  253      commitBuffer' raw sz count flush release
  254 {-# INLINE commitBuffer #-}
  255 #endif
  256 
  257 -- | Write a string to a handle, followed by a newline.
  258 hPutStrLn :: Handle -> Text -> IO ()
  259 -- entered 100 timeshPutStrLn h t = hPutStr h t >> hPutChar h '\n'
  260 
  261 -- | The 'interact' function takes a function of type @Text -> Text@
  262 -- as its argument. The entire input from the standard input device is
  263 -- passed to this function as its argument, and the resulting string
  264 -- is output on the standard output device.
  265 interact :: (Text -> Text) -> IO ()
  266 -- never enteredinteract f = putStr . f =<< getContents
  267 
  268 -- | Read all user input on 'stdin' as a single string.
  269 getContents :: IO Text
  270 -- never enteredgetContents = hGetContents stdin
  271 
  272 -- | Read a single line of user input from 'stdin'.
  273 getLine :: IO Text
  274 -- never enteredgetLine = hGetLine stdin
  275 
  276 -- | Write a string to 'stdout'.
  277 putStr :: Text -> IO ()
  278 -- never enteredputStr = hPutStr stdout
  279 
  280 -- | Write a string to 'stdout', followed by a newline.
  281 putStrLn :: Text -> IO ()
  282 -- never enteredputStrLn = hPutStrLn stdout
  283 
  284 -- $locale
  285 --
  286 -- /Note/: The behaviour of functions in this module depends on the
  287 -- version of GHC you are using.
  288 --
  289 -- Beginning with GHC 6.12, text I\/O is performed using the system or
  290 -- handle's current locale and line ending conventions.
  291 --
  292 -- Under GHC 6.10 and earlier, the system I\/O libraries /do not
  293 -- support/ locale-sensitive I\/O or line ending conversion.  On these
  294 -- versions of GHC, functions in this library all use UTF-8.  What
  295 -- does this mean in practice?
  296 --
  297 -- * All data that is read will be decoded as UTF-8.
  298 --
  299 -- * Before data is written, it is first encoded as UTF-8.
  300 --
  301 -- * On both reading and writing, the platform's native newline
  302 --   conversion is performed.
  303 --
  304 -- If you must use a non-UTF-8 locale on an older version of GHC, you
  305 -- will have to perform the transcoding yourself, e.g. as follows:
  306 --
  307 -- > import qualified Data.ByteString as B
  308 -- > import Data.Text (Text)
  309 -- > import Data.Text.Encoding (encodeUtf16)
  310 -- >
  311 -- > putStr_Utf16LE :: Text -> IO ()
  312 -- > putStr_Utf16LE t = B.putStr (encodeUtf16LE t)