Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 1 addition & 13 deletions src/Data/Text.hs
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ import qualified Data.Text.Internal.Fusion.Common as S
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
import Data.Text.Internal.Fusion (stream, reverseStream, unstream)
import Data.Text.Internal.Private (span_)
import Data.Text.Internal (Text(..), empty, firstf, mul, safe, text, append)
import Data.Text.Internal (Text(..), empty, firstf, mul, safe, text, append, pack)
import Data.Text.Internal.Unsafe.Char (unsafeWrite, unsafeChr8)
import Data.Text.Show (singleton, unpack, unpackCString#, unpackCStringAscii#)
import qualified Prelude as P
Expand Down Expand Up @@ -452,18 +452,6 @@ compareText (Text arrA offA lenA) (Text arrB offB lenB) =
-- lexicographic ordering of UTF-8 encoded strings matches lexicographic ordering
-- of underlying bytearrays, no decoding is needed.

-- -----------------------------------------------------------------------------
-- * Conversion to/from 'Text'

-- | /O(n)/ Convert a 'String' into a 'Text'.
-- Performs replacement on invalid scalar values, so @'unpack' . 'pack'@ is not 'id':
--
-- >>> unpack (pack "\55555")
-- "\65533"
pack :: String -> Text
pack = unstream . S.map safe . S.streamList
{-# INLINE [1] pack #-}

-- -----------------------------------------------------------------------------
-- * Basic functions

Expand Down
28 changes: 26 additions & 2 deletions src/Data/Text/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,18 @@ module Data.Text.Internal
, mul64
-- * Debugging
, showText
-- * Conversions
, pack
) where

#if defined(ASSERTS)
import Control.Exception (assert)
import GHC.Stack (HasCallStack)
#endif
import Control.Monad.ST (ST)
import Control.Monad.ST (ST, runST)
import Data.Bits
import Data.Int (Int32, Int64)
import Data.Text.Internal.Unsafe.Char (ord)
import Data.Text.Internal.Unsafe.Char (ord, unsafeWrite)
import Data.Typeable (Typeable)
import qualified Data.Text.Array as A

Expand Down Expand Up @@ -231,3 +233,25 @@ int64ToInt32 = fromIntegral
--
-- * Offset and length must point to a valid UTF-8 sequence of bytes.
-- Violation of this may cause memory access violation and divergence.

-- -----------------------------------------------------------------------------
-- * Conversion to/from 'Text'

-- | /O(n)/ Convert a 'String' into a 'Text'.
-- Performs replacement on invalid scalar values, so @'Data.Text.unpack' . 'pack'@ is not 'id':
--
-- >>> Data.Text.unpack (pack "\55555")
-- "\65533"
pack :: String -> Text
pack xs = runST $ do
-- Each 'Char' takes up to 4 bytes
marr <- A.new (length xs `shiftL` 2)
let go off [] = pure off
go off (c : cs) = do
d <- unsafeWrite marr off (safe c)
go (off + d) cs
len <- go 0 xs
arr <- A.unsafeFreeze marr
return (Text arr 0 len)
{-# NOINLINE [0] pack #-}
-- TODO Do not calculate length xs upfront
49 changes: 18 additions & 31 deletions src/Data/Text/Show.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ module Data.Text.Show
) where

import Control.Monad.ST (ST, runST)
import Data.Text.Internal (Text(..), empty_, safe)
import Data.Text.Internal (Text(..), empty_, safe, pack)
import Data.Text.Internal.Encoding.Utf8 (utf8Length)
import Data.Text.Internal.Fusion (stream, unstream)
import Data.Text.Internal.Fusion (stream)
import Data.Text.Internal.Unsafe.Char (unsafeWrite)
import GHC.Exts (Ptr(..), Int(..), Addr#, indexWord8OffAddr#)
import GHC.Word (Word8(..))
Expand Down Expand Up @@ -87,7 +87,11 @@ unpackCString# addr# = runST $ do
A.shrinkM marr actualLen
arr <- A.unsafeFreeze marr
return $ Text arr 0 actualLen
{-# INLINE unpackCString# #-}

-- When a module contains many literal strings, 'unpackCString#' can easily
-- bloat generated code to insane size. There is also very little to gain
-- from inlining. Thus explicit NOINLINE is desired.
{-# NOINLINE unpackCString# #-}

-- | /O(n)/ Convert a null-terminated ASCII string to a 'Text'.
-- Counterpart to 'GHC.unpackCString#'.
Expand All @@ -102,7 +106,7 @@ unpackCStringAscii# addr# = Text ba 0 l
marr <- A.new l
A.copyFromPointer marr 0 (Ptr addr#) l
A.unsafeFreeze marr
{-# INLINE unpackCStringAscii# #-}
{-# NOINLINE unpackCStringAscii# #-}

addrLen :: Addr# -> Int
#if MIN_VERSION_ghc_prim(0,7,0)
Expand All @@ -113,21 +117,17 @@ addrLen addr# = fromIntegral (inlinePerformIO (c_strlen (Ptr addr#)))
foreign import capi unsafe "string.h strlen" c_strlen :: CString -> IO CSize
#endif

{-# RULES "TEXT literal" [1] forall a.
unstream (S.map safe (S.streamList (GHC.unpackCString# a)))
= unpackCStringAscii# a #-}
{-# RULES "TEXT literal" forall a.
pack (GHC.unpackCString# a) = unpackCStringAscii# a #-}

{-# RULES "TEXT literal UTF8" [1] forall a.
unstream (S.map safe (S.streamList (GHC.unpackCStringUtf8# a)))
= unpackCString# a #-}
{-# RULES "TEXT literal UTF8" forall a.
pack (GHC.unpackCStringUtf8# a) = unpackCString# a #-}

{-# RULES "TEXT empty literal" [1]
unstream (S.map safe (S.streamList []))
= empty_ #-}
{-# RULES "TEXT empty literal"
pack [] = empty_ #-}

{-# RULES "TEXT singleton literal" [1] forall a.
unstream (S.map safe (S.streamList [a]))
= singleton_ a #-}
{-# RULES "TEXT singleton literal" forall a.
pack [a] = singleton a #-}

-- | /O(1)/ Convert a character into a Text.
-- Performs replacement on invalid scalar values.
Expand All @@ -136,24 +136,11 @@ singleton ::
HasCallStack =>
#endif
Char -> Text
singleton = unstream . S.singleton . safe
{-# INLINE [1] singleton #-}

{-# RULES "TEXT singleton" forall a.
unstream (S.singleton (safe a))
= singleton_ a #-}

-- This is intended to reduce inlining bloat.
singleton_ ::
#if defined(ASSERTS)
HasCallStack =>
#endif
Char -> Text
singleton_ c = Text (A.run x) 0 len
singleton c = Text (A.run x) 0 len
where x :: ST s (A.MArray s)
x = do arr <- A.new len
_ <- unsafeWrite arr 0 d
return arr
len = utf8Length d
d = safe c
{-# NOINLINE singleton_ #-}
{-# NOINLINE singleton #-}
3 changes: 3 additions & 0 deletions tests/Tests/Properties/LowLevel.hs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,10 @@ testLowLevel =
[ (`hasNoTypes` [''Char, ''[]])
, (`doesNotUseAnyOf` ['T.pack, 'S.unstream, 'T.map, 'safe, 'S.streamList])
, (`doesNotUseAnyOf` ['GHC.unpackCString#, 'GHC.unpackCStringUtf8#])
#if MIN_VERSION_base(4,10,0)
-- skip this test for GHC 8.0
, (`doesNotUseAnyOf` ['T.unpackCString#, 'T.unpackCStringAscii#])
#endif
]
't_literal_foo)
#endif
Expand Down