1 {-# LANGUAGE CPP #-} 2 {-# OPTIONS_GHC -XDeriveDataTypeable #-} 3 -- | 4 -- Module : Data.ByteString.Lazy.Internal 5 -- License : BSD-style 6 -- Maintainer : dons@galois.com, duncan@haskell.org 7 -- Stability : experimental 8 -- Portability : portable 9 -- 10 -- A module containing semi-public 'ByteString' internals. This exposes 11 -- the 'ByteString' representation and low level construction functions. 12 -- Modules which extend the 'ByteString' system will need to use this module 13 -- while ideally most users will be able to make do with the public interface 14 -- modules. 15 -- 16 module Data.ByteString.Lazy.Internal ( 17 18 -- * The lazy @ByteString@ type and representation 19 ByteString(..), -- instances: Eq, Ord, Show, Read, Data, Typeable 20 chunk, 21 foldrChunks, 22 foldlChunks, 23 24 -- * Data type invariant and abstraction function 25 invariant, 26 checkInvariant, 27 28 -- * Chunk allocation sizes 29 defaultChunkSize, 30 smallChunkSize, 31 chunkOverhead 32 33 ) where 34 35 import qualified Data.ByteString.Internal as S 36 37 import Foreign.Storable (sizeOf) 38 39 #if defined(__GLASGOW_HASKELL__) 40 import Data.Generics (Data(..), Typeable(..)) 41 #endif 42 43 -- | A space-efficient representation of a Word8 vector, supporting many 44 -- efficient operations. A 'ByteString' contains 8-bit characters only. 45 -- 46 -- Instances of Eq, Ord, Read, Show, Data, Typeable 47 -- 48 data ByteString = Empty | Chunk {-# UNPACK #-} !S.ByteString ByteString 49 deriving (Show, Read 50 #if defined(__GLASGOW_HASKELL__) 51 ,Data, Typeable 52 #endif 53 ) 54 55 ------------------------------------------------------------------------ 56 57 -- | The data type invariant: 58 -- Every ByteString is either 'Empty' or consists of non-null 'S.ByteString's. 59 -- All functions must preserve this, and the QC properties must check this. 60 -- 61 invariant :: ByteString -> Bool 62 invariant Empty = True 63 invariant (Chunk (S.PS _ _ len) cs) = len > 0 && invariant cs 64 65 -- | In a form that checks the invariant lazily. 66 checkInvariant :: ByteString -> ByteString 67 checkInvariant Empty = Empty 68 checkInvariant (Chunk c@(S.PS _ _ len) cs) 69 | len > 0 = Chunk c (checkInvariant cs) 70 | otherwise = error $ "Data.ByteString.Lazy: invariant violation:" 71 ++ show (Chunk c cs) 72 73 ------------------------------------------------------------------------ 74 75 -- | Smart constructor for 'Chunk'. Guarantees the data type invariant. 76 chunk :: S.ByteString -> ByteString -> ByteString 77 chunk c@(S.PS _ _ len) cs | len == 0 = cs 78 | otherwise = Chunk c cs 79 {-# INLINE chunk #-} 80 81 -- | Consume the chunks of a lazy ByteString with a natural right fold. 82 foldrChunks :: (S.ByteString -> a -> a) -> a -> ByteString -> a 83 foldrChunks f z = go 84 where go Empty = z 85 go (Chunk c cs) = f c (go cs) 86 {-# INLINE foldrChunks #-} 87 88 -- | Consume the chunks of a lazy ByteString with a strict, tail-recursive, 89 -- accumulating left fold. 90 foldlChunks :: (a -> S.ByteString -> a) -> a -> ByteString -> a 91 foldlChunks f z = go z 92 where go a _ | a `seq` False = undefined 93 go a Empty = a 94 go a (Chunk c cs) = go (f a c) cs 95 {-# INLINE foldlChunks #-} 96 97 ------------------------------------------------------------------------ 98 99 -- The representation uses lists of packed chunks. When we have to convert from 100 -- a lazy list to the chunked representation, then by default we use this 101 -- chunk size. Some functions give you more control over the chunk size. 102 -- 103 -- Measurements here: 104 -- http://www.cse.unsw.edu.au/~dons/tmp/chunksize_v_cache.png 105 -- 106 -- indicate that a value around 0.5 to 1 x your L2 cache is best. 107 -- The following value assumes people have something greater than 128k, 108 -- and need to share the cache with other programs. 109 110 -- | Currently set to 32k, less the memory management overhead 111 defaultChunkSize :: Int 112 defaultChunkSize = 32 * k - chunkOverhead 113 where k = 1024 114 115 -- | Currently set to 4k, less the memory management overhead 116 smallChunkSize :: Int 117 smallChunkSize = 4 * k - chunkOverhead 118 where k = 1024 119 120 -- | The memory management overhead. Currently this is tuned for GHC only. 121 chunkOverhead :: Int 122 chunkOverhead = 2 * sizeOf (undefined :: Int)