Skip to content

Commit b815dbe

Browse files
committed
Share empty Text values
Try to use the same heap object to represent all empty Text values. There are already attempts to enforce something like this through the `text` smart constructor, and in various functions by special casing the empty case. This patch expands on these attempts and adds some tests to ensure that empty Text values produced by this library are represented by the same heap object. Despite these efforts, we cannot guarantee that this will be the case in all situations and users of the library shouldn't rely on this behaviour for the correctness of their code. Resolves #492.
1 parent 0a72ea0 commit b815dbe

File tree

11 files changed

+494
-218
lines changed

11 files changed

+494
-218
lines changed

src/Data/Text.hs

Lines changed: 58 additions & 193 deletions
Large diffs are not rendered by default.

src/Data/Text/Foreign.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ newtype I8 = I8 Int
7171
fromPtr :: Ptr Word8 -- ^ source array
7272
-> I8 -- ^ length of source array (in 'Word8' units)
7373
-> IO Text
74+
fromPtr _ (I8 0) = pure empty
7475
fromPtr ptr (I8 len) = unsafeSTToIO $ do
7576
dst <- A.new len
7677
A.copyFromPointer dst 0 ptr len

src/Data/Text/Internal.hs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ module Data.Text.Internal
3737
, safe
3838
-- * Code that must be here for accessibility
3939
, empty
40-
, empty_
4140
, append
4241
-- * Utilities
4342
, firstf
@@ -94,12 +93,7 @@ text_ arr off len =
9493
-- | /O(1)/ The empty 'Text'.
9594
empty :: Text
9695
empty = Text A.empty 0 0
97-
{-# INLINE [1] empty #-}
98-
99-
-- | A non-inlined version of 'empty'.
100-
empty_ :: Text
101-
empty_ = Text A.empty 0 0
102-
{-# NOINLINE empty_ #-}
96+
{-# NOINLINE empty #-}
10397

10498
-- | /O(n)/ Appends one 'Text' to the other by copying both of them
10599
-- into a new 'Text'.
@@ -121,6 +115,7 @@ append a@(Text arr1 off1 len1) b@(Text arr2 off2 len2)
121115

122116
-- | Construct a 'Text' without invisibly pinning its byte array in
123117
-- memory if its length has dwindled to zero.
118+
-- It ensures that empty 'Text' values are shared.
124119
text ::
125120
#if defined(ASSERTS)
126121
HasCallStack =>
@@ -131,7 +126,7 @@ text ::
131126
-> Text
132127
text arr off len | len == 0 = empty
133128
| otherwise = text_ arr off len
134-
{-# INLINE text #-}
129+
{-# INLINE [0] text #-}
135130

136131
textP :: A.Array -> Int -> Int -> Text
137132
{-# DEPRECATED textP "Use text instead" #-}
@@ -251,6 +246,7 @@ int64ToInt32 = fromIntegral
251246
-- >>> Data.Text.unpack (pack "\55555")
252247
-- "\65533"
253248
pack :: String -> Text
249+
pack [] = empty
254250
pack xs = runST $ do
255251
-- It's tempting to allocate a buffer of 4 * length xs bytes,
256252
-- but not only it's wasteful for predominantly ASCII arguments,

src/Data/Text/Internal/Lazy.hs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import Data.Typeable (Typeable)
4747
import Foreign.Storable (sizeOf)
4848
import qualified Data.Text.Array as A
4949
import qualified Data.Text.Internal as T
50+
import qualified Data.Text as T
5051

5152
data Text = Empty
5253
| Chunk {-# UNPACK #-} !T.Text Text
@@ -86,9 +87,16 @@ showStructure (Chunk t ts) =
8687

8788
-- | Smart constructor for 'Chunk'. Guarantees the data type invariant.
8889
chunk :: T.Text -> Text -> Text
89-
{-# INLINE chunk #-}
90-
chunk t@(T.Text _ _ len) ts | len == 0 = ts
91-
| otherwise = Chunk t ts
90+
{-# INLINE [0] chunk #-}
91+
chunk t ts | T.null t = ts
92+
| otherwise = Chunk t ts
93+
94+
{-# RULES
95+
"TEXT chunk/text" forall arr off len.
96+
chunk (T.text arr off len) = chunk (T.Text arr off len)
97+
"TEXT chunk/empty" forall ts.
98+
chunk T.empty ts = ts
99+
#-}
92100

93101
-- | Smart constructor for 'Empty'.
94102
empty :: Text

src/Data/Text/Internal/Reverse.hs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
{-# OPTIONS_HADDOCK not-home #-}
1010

1111
-- | Implements 'reverse', using efficient C routines by default.
12-
module Data.Text.Internal.Reverse (reverse) where
12+
module Data.Text.Internal.Reverse (reverse, reverseNonEmpty) where
1313

1414
#if !defined(PURE_HASKELL)
1515
import GHC.Exts as Exts
@@ -23,7 +23,7 @@ import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader)
2323
import GHC.Stack (HasCallStack)
2424
#endif
2525
import Prelude hiding (reverse)
26-
import Data.Text.Internal (Text(..))
26+
import Data.Text.Internal (Text(..), empty)
2727
import Control.Monad.ST (runST)
2828
import qualified Data.Text.Array as A
2929

@@ -39,8 +39,16 @@ reverse ::
3939
HasCallStack =>
4040
#endif
4141
Text -> Text
42+
reverse (Text _ _ 0) = empty
43+
reverse t = reverseNonEmpty t
44+
{-# INLINE reverse #-}
45+
46+
-- | /O(n)/ Reverse the characters of a string.
47+
-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty.
48+
reverseNonEmpty ::
49+
Text -> Text
4250
#if defined(PURE_HASKELL)
43-
reverse (Text src off len) = runST $ do
51+
reverseNonEmtpy (Text src off len) = runST $ do
4452
dest <- A.new len
4553
_ <- reversePoints src off dest len
4654
result <- A.unsafeFreeze dest
@@ -80,13 +88,13 @@ reversePoints src xx dest yy = go xx yy where
8088
A.copyI pLen dest yNext src x
8189
go (x + pLen) yNext
8290
#else
83-
reverse (Text (A.ByteArray ba) off len) = runST $ do
91+
reverseNonEmpty (Text (A.ByteArray ba) off len) = runST $ do
8492
marr@(A.MutableByteArray mba) <- A.new len
8593
unsafeIOToST $ c_reverse mba ba (fromIntegral off) (fromIntegral len)
8694
brr <- A.unsafeFreeze marr
8795
return $ Text brr 0 len
8896
#endif
89-
{-# INLINE reverse #-}
97+
{-# INLINE reverseNonEmpty #-}
9098

9199
#if !defined(PURE_HASKELL)
92100
-- | The input buffer (src :: ByteArray#, off :: CSize, len :: CSize)

0 commit comments

Comments
 (0)