1 {-# LANGUAGE BangPatterns, CPP, RecordWildCards #-}
    2 -- |
    3 -- Module      : Data.Text.IO.Internal
    4 -- Copyright   : (c) 2009, 2010 Bryan O'Sullivan,
    5 --               (c) 2009 Simon Marlow
    6 -- License     : BSD-style
    7 -- Maintainer  : bos@serpentine.com
    8 -- Stability   : experimental
    9 -- Portability : GHC
   10 --
   11 -- Low-level support for text I\/O.
   12 
   13 module Data.Text.IO.Internal
   14     (
   15 #if __GLASGOW_HASKELL__ >= 612
   16       hGetLineWith
   17     , readChunk
   18 #endif
   19     ) where
   20 
   21 #if __GLASGOW_HASKELL__ >= 612
   22 import Data.IORef (readIORef, writeIORef)
   23 import Data.Text (Text)
   24 import Data.Text.Fusion (unstream)
   25 import Data.Text.Fusion.Internal (Step(..), Stream(..))
   26 import Data.Text.Fusion.Size (exactSize, maxSize)
   27 import Data.Text.Unsafe (inlinePerformIO)
   28 import Foreign.Storable (peekElemOff)
   29 import GHC.IO.Buffer (Buffer(..), CharBuffer, RawCharBuffer, bufferAdjustL,
   30                       bufferElems, charSize, isEmptyBuffer, readCharBuf,
   31                       withRawBuffer, writeCharBuf)
   32 import GHC.IO.Handle.Internals (ioe_EOF, readTextDevice, wantReadableHandle_)
   33 import GHC.IO.Handle.Types (Handle__(..), Newline(..))
   34 import System.IO (Handle)
   35 import System.IO.Error (isEOFError)
   36 import qualified Data.Text as T
   37 
   38 -- | Read a single line of input from a handle, constructing a list of
   39 -- decoded chunks as we go.  When we're done, transform them into the
   40 -- destination type.
   41 hGetLineWith :: ([Text] -> t) -> Handle -> IO t
   42 -- entered 200 timeshGetLineWith f h = wantReadableHandle_ "hGetLine" h go
   43   where
   44     go hh@Handle__{..} = readIORef haCharBuffer >>= fmap f . hGetLineLoop hh []
   45 
   46 hGetLineLoop :: Handle__ -> [Text] -> CharBuffer -> IO [Text]
   47 -- entered 400 timeshGetLineLoop hh@Handle__{..} ts buf@Buffer{ bufL=r0, bufR=w, bufRaw=raw0 } = do
   48   let findEOL raw r | r == w    = return (False, w)
   49                     | otherwise = do
   50         (c,r') <- readCharBuf raw r
   51         if c == '\n'
   52           then return (True, r)
   53           else findEOL raw r'
   54   (eol, off) <- findEOL raw0 r0
   55   (t,r') <- if haInputNL == CRLF
   56             then unpack_nl raw0 r0 off
   57             else do t <- unpack raw0 r0 off
   58                     return (t,off)
   59   if eol
   60     then do writeIORef haCharBuffer (bufferAdjustL (off+1) buf)
   61             return $ reverse (t:ts)
   62     else do
   63       let buf1 = bufferAdjustL r' buf
   64       maybe_buf <- maybeFillReadBuffer hh buf1
   65       case maybe_buf of
   66          -- Nothing indicates we caught an EOF, and we may have a
   67          -- partial line to return.
   68          Nothing -> do
   69               -- we reached EOF.  There might be a lone \r left
   70               -- in the buffer, so check for that and
   71               -- append it to the line if necessary.
   72               let pre | isEmptyBuffer buf1 = T.empty
   73                       | otherwise          = T.singleton '\r'
   74               writeIORef haCharBuffer buf1{ bufL=0, bufR=0 }
   75               let str = reverse . filter (not . T.null) $ pre:t:ts
   76               if null str
   77                 then ioe_EOF
   78                 else return str
   79          Just new_buf -> hGetLineLoop hh (t:ts) new_buf
   80 
   81 -- This function is lifted almost verbatim from GHC.IO.Handle.Text.
   82 maybeFillReadBuffer :: Handle__ -> CharBuffer -> IO (Maybe CharBuffer)
   83 -- entered 200 timesmaybeFillReadBuffer handle_ buf
   84   = catch (Just `fmap` getSomeCharacters handle_ buf) $ \e ->
   85       if isEOFError e 
   86       then return Nothing 
   87       else ioError e
   88 
   89 unpack :: RawCharBuffer -> Int -> Int -> IO Text
   90 -- entered 291 timesunpack !buf !r !w
   91  | charSize /= 4 = sizeError "unpack"
   92  | r >= w        = return T.empty
   93  | otherwise     = withRawBuffer buf go
   94  where
   95   go pbuf = return $! unstream (Stream next r (exactSize (w-r)))
   96    where
   97     next !i | i >= w    = Done
   98             | otherwise = Yield (ix i) (i+1)
   99     ix i = inlinePerformIO $ peekElemOff pbuf i
  100 
  101 unpack_nl :: RawCharBuffer -> Int -> Int -> IO (Text, Int)
  102 -- entered 317 timesunpack_nl !buf !r !w
  103  | charSize /= 4 = sizeError "unpack_nl"
  104  | r >= w        = return (T.empty, 0)
  105  | otherwise     = withRawBuffer buf $ go
  106  where
  107   go pbuf = do
  108     let !t = unstream (Stream next r (maxSize (w-r)))
  109         w' = w - 1
  110     return $ if ix w' == '\r'
  111              then (t,w')
  112              else (t,w)
  113    where
  114     next !i | i >= w = Done
  115             | c == '\r' = let i' = i + 1
  116                           in if i' < w
  117                              then if ix i' == '\n'
  118                                   then Yield '\n' (i+2)
  119                                   else Yield '\n' i'
  120                              else Done
  121             | otherwise = Yield c (i+1)
  122             where c = ix i
  123     ix i = inlinePerformIO $ peekElemOff pbuf i
  124 
  125 -- This function is completely lifted from GHC.IO.Handle.Text.
  126 getSomeCharacters :: Handle__ -> CharBuffer -> IO CharBuffer
  127 -- entered 608 timesgetSomeCharacters handle_@Handle__{..} buf@Buffer{..} =
  128   case bufferElems buf of
  129     -- buffer empty: read some more
  130     0 -> {-# SCC "readTextDevice" #-} readTextDevice handle_ buf
  131 
  132     -- if the buffer has a single '\r' in it and we're doing newline
  133     -- translation: read some more
  134     1 | haInputNL == CRLF -> do
  135       (c,_) <- readCharBuf bufRaw bufL
  136       if c == '\r'
  137          then do -- shuffle the '\r' to the beginning.  This is only safe
  138                  -- if we're about to call readTextDevice, otherwise it
  139                  -- would mess up flushCharBuffer.
  140                  -- See [note Buffer Flushing], GHC.IO.Handle.Types
  141                  _ <- writeCharBuf bufRaw 0 '\r'
  142                  let buf' = buf{ bufL=0, bufR=1 }
  143                  readTextDevice handle_ buf'
  144          else do
  145                  return buf
  146 
  147     -- buffer has some chars in it already: just return it
  148     _otherwise -> {-# SCC "otherwise" #-} return buf
  149 
  150 -- | Read a single chunk of strict text from a buffer. Used by both
  151 -- the strict and lazy implementations of hGetContents.
  152 readChunk :: Handle__ -> CharBuffer -> IO Text
  153 -- entered 408 timesreadChunk hh@Handle__{..} buf = do
  154   buf'@Buffer{..} <- getSomeCharacters hh buf
  155   (t,r) <- if haInputNL == CRLF
  156            then unpack_nl bufRaw bufL bufR
  157            else do t <- unpack bufRaw bufL bufR
  158                    return (t,bufR)
  159   writeIORef haCharBuffer (bufferAdjustL r buf')
  160   return t
  161 
  162 sizeError :: String -> a
  163 -- never enteredsizeError loc = error $ "Data.Text.IO." ++ loc ++ ": bad internal buffer size"
  164 #endif