{-# LANGUAGE TypeApplications #-}

module DataFrame.IO.JSON (
    readJSON,
    readJSONEither,
) where

import Control.Monad (forM)
import Data.Aeson
import qualified Data.Aeson.Key as K
import qualified Data.Aeson.KeyMap as KM
import qualified Data.ByteString.Lazy as LBS
import Data.Maybe (catMaybes)
import Data.Scientific (toRealFloat)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Vector as V

import qualified DataFrame.Internal.Column as D
import qualified DataFrame.Internal.DataFrame as D
import qualified DataFrame.Operations.Core as D

readJSONEither :: LBS.ByteString -> Either String D.DataFrame
readJSONEither :: ByteString -> Either [Char] DataFrame
readJSONEither ByteString
bs = do
    Value
v <- [Char] -> Maybe Value -> Either [Char] Value
forall e a. e -> Maybe a -> Either e a
note [Char]
"Could not decode JSON" (forall a. FromJSON a => ByteString -> Maybe a
decode @Value ByteString
bs)
    Vector Object
rows <- Value -> Either [Char] (Vector Object)
toArrayOfObjects Value
v
    let cols :: [Text]
        cols :: [Text]
cols =
            [Text] -> [Text]
forall a. Ord a => [a] -> [a]
uniq
                ([Text] -> [Text])
-> (Vector Object -> [Text]) -> Vector Object -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Object -> [Text]) -> [Object] -> [Text]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((Key -> Text) -> [Key] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Key -> Text
K.toText ([Key] -> [Text]) -> (Object -> [Key]) -> Object -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Object -> [Key]
forall v. KeyMap v -> [Key]
KM.keys)
                ([Object] -> [Text])
-> (Vector Object -> [Object]) -> Vector Object -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Object -> [Object]
forall a. Vector a -> [a]
V.toList
                (Vector Object -> [Text]) -> Vector Object -> [Text]
forall a b. (a -> b) -> a -> b
$ Vector Object
rows

    [(Text, Column)]
columns <- [Text]
-> (Text -> Either [Char] (Text, Column))
-> Either [Char] [(Text, Column)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Text]
cols ((Text -> Either [Char] (Text, Column))
 -> Either [Char] [(Text, Column)])
-> (Text -> Either [Char] (Text, Column))
-> Either [Char] [(Text, Column)]
forall a b. (a -> b) -> a -> b
$ \Text
c -> do
        let col :: Column
col = Vector Object -> Text -> Column
buildColumn Vector Object
rows Text
c
        (Text, Column) -> Either [Char] (Text, Column)
forall a. a -> Either [Char] a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text
c, Column
col)

    DataFrame -> Either [Char] DataFrame
forall a. a -> Either [Char] a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Either [Char] DataFrame)
-> DataFrame -> Either [Char] DataFrame
forall a b. (a -> b) -> a -> b
$ [(Text, Column)] -> DataFrame
D.fromNamedColumns [(Text, Column)]
columns

readJSON :: FilePath -> IO D.DataFrame
readJSON :: [Char] -> IO DataFrame
readJSON [Char]
path = do
    ByteString
contents <- [Char] -> IO ByteString
LBS.readFile [Char]
path
    case ByteString -> Either [Char] DataFrame
readJSONEither ByteString
contents of
        Left [Char]
err -> [Char] -> IO DataFrame
forall a. [Char] -> IO a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> IO DataFrame) -> [Char] -> IO DataFrame
forall a b. (a -> b) -> a -> b
$ [Char]
"readJSON: " [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> [Char]
err
        Right DataFrame
df -> DataFrame -> IO DataFrame
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure DataFrame
df

toArrayOfObjects :: Value -> Either String (V.Vector Object)
toArrayOfObjects :: Value -> Either [Char] (Vector Object)
toArrayOfObjects (Array Vector Value
xs)
    | Vector Value -> Bool
forall a. Vector a -> Bool
V.null Vector Value
xs = [Char] -> Either [Char] (Vector Object)
forall a b. a -> Either a b
Left [Char]
"Top-level JSON array is empty"
    | Bool
otherwise = (Value -> Either [Char] Object)
-> Vector Value -> Either [Char] (Vector Object)
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
forall (f :: * -> *) a b.
Applicative f =>
(a -> f b) -> Vector a -> f (Vector b)
traverse Value -> Either [Char] Object
asObject Vector Value
xs
toArrayOfObjects Value
_ =
    [Char] -> Either [Char] (Vector Object)
forall a b. a -> Either a b
Left [Char]
"Top-level JSON value must be a JSON array of objects"

asObject :: Value -> Either String Object
asObject :: Value -> Either [Char] Object
asObject (Object Object
o) = Object -> Either [Char] Object
forall a b. b -> Either a b
Right Object
o
asObject Value
_ = [Char] -> Either [Char] Object
forall a b. a -> Either a b
Left [Char]
"Expected each element of the array to be an object"

uniq :: (Ord a) => [a] -> [a]
uniq :: forall a. Ord a => [a] -> [a]
uniq = [a] -> [a] -> [a]
forall {a}. Eq a => [a] -> [a] -> [a]
go [a]
forall a. Monoid a => a
mempty
  where
    go :: [a] -> [a] -> [a]
go [a]
_ [] = []
    go [a]
seen (a
x : [a]
xs)
        | a
x a -> [a] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [a]
seen = [a] -> [a] -> [a]
go [a]
seen [a]
xs
        | Bool
otherwise = a
x a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a] -> [a] -> [a]
go (a
x a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
seen) [a]
xs

note :: e -> Maybe a -> Either e a
note :: forall e a. e -> Maybe a -> Either e a
note e
e = Either e a -> (a -> Either e a) -> Maybe a -> Either e a
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (e -> Either e a
forall a b. a -> Either a b
Left e
e) a -> Either e a
forall a b. b -> Either a b
Right

data ColType
    = CTString
    | CTNumber
    | CTBool
    | CTArray
    | CTMixed

buildColumn :: V.Vector Object -> Text -> D.Column
buildColumn :: Vector Object -> Text -> Column
buildColumn Vector Object
rows Text
colName =
    let key :: Key
key = Text -> Key
K.fromText Text
colName
        values :: V.Vector (Maybe Value)
        values :: Vector (Maybe Value)
values = (Object -> Maybe Value) -> Vector Object -> Vector (Maybe Value)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Key -> Object -> Maybe Value
forall v. Key -> KeyMap v -> Maybe v
KM.lookup Key
key) Vector Object
rows
        colType :: ColType
colType = Vector (Maybe Value) -> ColType
detectColType Vector (Maybe Value)
values
     in case ColType
colType of
            ColType
CTString ->
                Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
D.fromVector ((Maybe Value -> Maybe Text)
-> Vector (Maybe Value) -> Vector (Maybe Text)
forall a b. (a -> b) -> Vector a -> Vector b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Value -> Text) -> Maybe Value -> Maybe Text
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Value -> Text
asText) Vector (Maybe Value)
values)
            ColType
CTNumber ->
                Vector (Maybe Double) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
D.fromVector ((Maybe Value -> Maybe Double)
-> Vector (Maybe Value) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Value -> Double) -> Maybe Value -> Maybe Double
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Value -> Double
asDouble) Vector (Maybe Value)
values)
            ColType
CTBool ->
                Vector (Maybe Bool) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
D.fromVector ((Maybe Value -> Maybe Bool)
-> Vector (Maybe Value) -> Vector (Maybe Bool)
forall a b. (a -> b) -> Vector a -> Vector b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Value -> Bool) -> Maybe Value -> Maybe Bool
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Value -> Bool
asBool) Vector (Maybe Value)
values)
            ColType
CTArray ->
                Vector (Maybe (Vector Value)) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
D.fromVector ((Maybe Value -> Maybe (Vector Value))
-> Vector (Maybe Value) -> Vector (Maybe (Vector Value))
forall a b. (a -> b) -> Vector a -> Vector b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Value -> Vector Value) -> Maybe Value -> Maybe (Vector Value)
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Value -> Vector Value
asArray) Vector (Maybe Value)
values)
            ColType
CTMixed ->
                Vector (Maybe Value) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
D.fromVector Vector (Maybe Value)
values

detectColType :: V.Vector (Maybe Value) -> ColType
detectColType :: Vector (Maybe Value) -> ColType
detectColType Vector (Maybe Value)
vals =
    case [Value]
nonMissing of
        [] -> ColType
CTMixed
        [Value]
vs
            | (Value -> Bool) -> [Value] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Value -> Bool
isString [Value]
vs -> ColType
CTString
            | (Value -> Bool) -> [Value] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Value -> Bool
isNumber [Value]
vs -> ColType
CTNumber
            | (Value -> Bool) -> [Value] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Value -> Bool
isBool [Value]
vs -> ColType
CTBool
            | (Value -> Bool) -> [Value] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Value -> Bool
isArray [Value]
vs -> ColType
CTArray
            | Bool
otherwise -> ColType
CTMixed
  where
    nonMissing :: [Value]
nonMissing = [Maybe Value] -> [Value]
forall a. [Maybe a] -> [a]
catMaybes (Vector (Maybe Value) -> [Maybe Value]
forall a. Vector a -> [a]
V.toList Vector (Maybe Value)
vals)

    isString :: Value -> Bool
isString (String Text
_) = Bool
True
    isString Value
_ = Bool
False

    isNumber :: Value -> Bool
isNumber (Number Scientific
_) = Bool
True
    isNumber Value
_ = Bool
False

    isBool :: Value -> Bool
isBool (Bool Bool
_) = Bool
True
    isBool Value
_ = Bool
False

    isArray :: Value -> Bool
isArray (Array Vector Value
_) = Bool
True
    isArray Value
_ = Bool
False

asText :: Value -> Text
asText :: Value -> Text
asText (String Text
s) = Text
s
asText Value
v = [Char] -> Text
T.pack (Value -> [Char]
forall a. Show a => a -> [Char]
show Value
v)

asDouble :: Value -> Double
asDouble :: Value -> Double
asDouble (Number Scientific
s) = forall a. RealFloat a => Scientific -> a
toRealFloat @Double Scientific
s
asDouble Value
v = [Char] -> Double
forall a. HasCallStack => [Char] -> a
error ([Char] -> Double) -> [Char] -> Double
forall a b. (a -> b) -> a -> b
$ [Char]
"asDouble: non-number value: " [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Value -> [Char]
forall a. Show a => a -> [Char]
show Value
v

asBool :: Value -> Bool
asBool :: Value -> Bool
asBool (Bool Bool
b) = Bool
b
asBool Value
v = [Char] -> Bool
forall a. HasCallStack => [Char] -> a
error ([Char] -> Bool) -> [Char] -> Bool
forall a b. (a -> b) -> a -> b
$ [Char]
"asBool: non-bool value: " [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Value -> [Char]
forall a. Show a => a -> [Char]
show Value
v

asArray :: Value -> V.Vector Value
asArray :: Value -> Vector Value
asArray (Array Vector Value
a) = Vector Value
a
asArray Value
v = [Char] -> Vector Value
forall a. HasCallStack => [Char] -> a
error ([Char] -> Vector Value) -> [Char] -> Vector Value
forall a b. (a -> b) -> a -> b
$ [Char]
"asArray: non-array value: " [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> Value -> [Char]
forall a. Show a => a -> [Char]
show Value
v