1 {-# LANGUAGE CPP #-}
    2 {-# OPTIONS_GHC -XDeriveDataTypeable #-}
    3 -- |
    4 -- Module      : Data.ByteString.Lazy.Internal
    5 -- License     : BSD-style
    6 -- Maintainer  : dons@galois.com, duncan@haskell.org
    7 -- Stability   : experimental
    8 -- Portability : portable
    9 -- 
   10 -- A module containing semi-public 'ByteString' internals. This exposes
   11 -- the 'ByteString' representation and low level construction functions.
   12 -- Modules which extend the 'ByteString' system will need to use this module
   13 -- while ideally most users will be able to make do with the public interface
   14 -- modules.
   15 --
   16 module Data.ByteString.Lazy.Internal (
   17 
   18         -- * The lazy @ByteString@ type and representation
   19         ByteString(..),     -- instances: Eq, Ord, Show, Read, Data, Typeable
   20         chunk,
   21         foldrChunks,
   22         foldlChunks,
   23 
   24         -- * Data type invariant and abstraction function
   25         invariant,
   26         checkInvariant,
   27 
   28         -- * Chunk allocation sizes
   29         defaultChunkSize,
   30         smallChunkSize,
   31         chunkOverhead
   32 
   33   ) where
   34 
   35 import qualified Data.ByteString.Internal as S
   36 
   37 import Foreign.Storable (sizeOf)
   38 
   39 #if defined(__GLASGOW_HASKELL__)
   40 import Data.Generics            (Data(..), Typeable(..))
   41 #endif
   42 
   43 -- | A space-efficient representation of a Word8 vector, supporting many
   44 -- efficient operations.  A 'ByteString' contains 8-bit characters only.
   45 --
   46 -- Instances of Eq, Ord, Read, Show, Data, Typeable
   47 --
   48 data ByteString = Empty | Chunk {-# UNPACK #-} !S.ByteString ByteString
   49     deriving (Show, Read
   50 #if defined(__GLASGOW_HASKELL__)
   51                         ,Data, Typeable
   52 #endif
   53              )
   54 
   55 ------------------------------------------------------------------------
   56 
   57 -- | The data type invariant:
   58 -- Every ByteString is either 'Empty' or consists of non-null 'S.ByteString's.
   59 -- All functions must preserve this, and the QC properties must check this.
   60 --
   61 invariant :: ByteString -> Bool
   62 invariant Empty                     = True
   63 invariant (Chunk (S.PS _ _ len) cs) = len > 0 && invariant cs
   64 
   65 -- | In a form that checks the invariant lazily.
   66 checkInvariant :: ByteString -> ByteString
   67 checkInvariant Empty = Empty
   68 checkInvariant (Chunk c@(S.PS _ _ len) cs)
   69     | len > 0   = Chunk c (checkInvariant cs)
   70     | otherwise = error $ "Data.ByteString.Lazy: invariant violation:"
   71                ++ show (Chunk c cs)
   72 
   73 ------------------------------------------------------------------------
   74 
   75 -- | Smart constructor for 'Chunk'. Guarantees the data type invariant.
   76 chunk :: S.ByteString -> ByteString -> ByteString
   77 chunk c@(S.PS _ _ len) cs | len == 0  = cs
   78                           | otherwise = Chunk c cs
   79 {-# INLINE chunk #-}
   80 
   81 -- | Consume the chunks of a lazy ByteString with a natural right fold.
   82 foldrChunks :: (S.ByteString -> a -> a) -> a -> ByteString -> a
   83 foldrChunks f z = go
   84   where go Empty        = z
   85         go (Chunk c cs) = f c (go cs)
   86 {-# INLINE foldrChunks #-}
   87 
   88 -- | Consume the chunks of a lazy ByteString with a strict, tail-recursive,
   89 -- accumulating left fold.
   90 foldlChunks :: (a -> S.ByteString -> a) -> a -> ByteString -> a
   91 foldlChunks f z = go z
   92   where go a _ | a `seq` False = undefined
   93         go a Empty        = a
   94         go a (Chunk c cs) = go (f a c) cs
   95 {-# INLINE foldlChunks #-}
   96 
   97 ------------------------------------------------------------------------
   98 
   99 -- The representation uses lists of packed chunks. When we have to convert from
  100 -- a lazy list to the chunked representation, then by default we use this
  101 -- chunk size. Some functions give you more control over the chunk size.
  102 --
  103 -- Measurements here:
  104 --  http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png
  105 --
  106 -- indicate that a value around 0.5 to 1 x your L2 cache is best.
  107 -- The following value assumes people have something greater than 128k,
  108 -- and need to share the cache with other programs.
  109 
  110 -- | Currently set to 32k, less the memory management overhead
  111 defaultChunkSize :: Int
  112 defaultChunkSize = 32 * k - chunkOverhead
  113    where k = 1024
  114 
  115 -- | Currently set to 4k, less the memory management overhead
  116 smallChunkSize :: Int
  117 smallChunkSize = 4 * k - chunkOverhead
  118    where k = 1024
  119 
  120 -- | The memory management overhead. Currently this is tuned for GHC only.
  121 chunkOverhead :: Int
  122 chunkOverhead = 2 * sizeOf (undefined :: Int)