{-# LANGUAGE TypeApplications #-} module DataFrame.IO.JSON ( readJSON, readJSONEither, ) where import Control.Monad (forM) import Data.Aeson import qualified Data.Aeson.Key as K import qualified Data.Aeson.KeyMap as KM import qualified Data.ByteString.Lazy as LBS import Data.Maybe (catMaybes) import Data.Scientific (toRealFloat) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Vector as V import qualified DataFrame.Internal.Column as D import qualified DataFrame.Internal.DataFrame as D import qualified DataFrame.Operations.Core as D readJSONEither :: LBS.ByteString -> Either String D.DataFrame readJSONEither :: ByteString -> Either [Char] DataFrame readJSONEither ByteString bs = do Value v <- [Char] -> Maybe Value -> Either [Char] Value forall e a. e -> Maybe a -> Either e a note [Char] "Could not decode JSON" (forall a. FromJSON a => ByteString -> Maybe a decode @Value ByteString bs) Vector Object rows <- Value -> Either [Char] (Vector Object) toArrayOfObjects Value v let cols :: [Text] cols :: [Text] cols = [Text] -> [Text] forall a. Ord a => [a] -> [a] uniq ([Text] -> [Text]) -> (Vector Object -> [Text]) -> Vector Object -> [Text] forall b c a. (b -> c) -> (a -> b) -> a -> c . (Object -> [Text]) -> [Object] -> [Text] forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b] concatMap ((Key -> Text) -> [Key] -> [Text] forall a b. (a -> b) -> [a] -> [b] map Key -> Text K.toText ([Key] -> [Text]) -> (Object -> [Key]) -> Object -> [Text] forall b c a. (b -> c) -> (a -> b) -> a -> c . Object -> [Key] forall v. KeyMap v -> [Key] KM.keys) ([Object] -> [Text]) -> (Vector Object -> [Object]) -> Vector Object -> [Text] forall b c a. (b -> c) -> (a -> b) -> a -> c . Vector Object -> [Object] forall a. Vector a -> [a] V.toList (Vector Object -> [Text]) -> Vector Object -> [Text] forall a b. (a -> b) -> a -> b $ Vector Object rows [(Text, Column)] columns <- [Text] -> (Text -> Either [Char] (Text, Column)) -> Either [Char] [(Text, Column)] forall (t :: * -> *) (m :: * -> *) a b. (Traversable t, Monad m) => t a -> (a -> m b) -> m (t b) forM [Text] cols ((Text -> Either [Char] (Text, Column)) -> Either [Char] [(Text, Column)]) -> (Text -> Either [Char] (Text, Column)) -> Either [Char] [(Text, Column)] forall a b. (a -> b) -> a -> b $ \Text c -> do let col :: Column col = Vector Object -> Text -> Column buildColumn Vector Object rows Text c (Text, Column) -> Either [Char] (Text, Column) forall a. a -> Either [Char] a forall (f :: * -> *) a. Applicative f => a -> f a pure (Text c, Column col) DataFrame -> Either [Char] DataFrame forall a. a -> Either [Char] a forall (f :: * -> *) a. Applicative f => a -> f a pure (DataFrame -> Either [Char] DataFrame) -> DataFrame -> Either [Char] DataFrame forall a b. (a -> b) -> a -> b $ [(Text, Column)] -> DataFrame D.fromNamedColumns [(Text, Column)] columns readJSON :: FilePath -> IO D.DataFrame readJSON :: [Char] -> IO DataFrame readJSON [Char] path = do ByteString contents <- [Char] -> IO ByteString LBS.readFile [Char] path case ByteString -> Either [Char] DataFrame readJSONEither ByteString contents of Left [Char] err -> [Char] -> IO DataFrame forall a. [Char] -> IO a forall (m :: * -> *) a. MonadFail m => [Char] -> m a fail ([Char] -> IO DataFrame) -> [Char] -> IO DataFrame forall a b. (a -> b) -> a -> b $ [Char] "readJSON: " [Char] -> [Char] -> [Char] forall a. Semigroup a => a -> a -> a <> [Char] err Right DataFrame df -> DataFrame -> IO DataFrame forall a. a -> IO a forall (f :: * -> *) a. Applicative f => a -> f a pure DataFrame df toArrayOfObjects :: Value -> Either String (V.Vector Object) toArrayOfObjects :: Value -> Either [Char] (Vector Object) toArrayOfObjects (Array Vector Value xs) | Vector Value -> Bool forall a. Vector a -> Bool V.null Vector Value xs = [Char] -> Either [Char] (Vector Object) forall a b. a -> Either a b Left [Char] "Top-level JSON array is empty" | Bool otherwise = (Value -> Either [Char] Object) -> Vector Value -> Either [Char] (Vector Object) forall (t :: * -> *) (f :: * -> *) a b. (Traversable t, Applicative f) => (a -> f b) -> t a -> f (t b) forall (f :: * -> *) a b. Applicative f => (a -> f b) -> Vector a -> f (Vector b) traverse Value -> Either [Char] Object asObject Vector Value xs toArrayOfObjects Value _ = [Char] -> Either [Char] (Vector Object) forall a b. a -> Either a b Left [Char] "Top-level JSON value must be a JSON array of objects" asObject :: Value -> Either String Object asObject :: Value -> Either [Char] Object asObject (Object Object o) = Object -> Either [Char] Object forall a b. b -> Either a b Right Object o asObject Value _ = [Char] -> Either [Char] Object forall a b. a -> Either a b Left [Char] "Expected each element of the array to be an object" uniq :: (Ord a) => [a] -> [a] uniq :: forall a. Ord a => [a] -> [a] uniq = [a] -> [a] -> [a] forall {a}. Eq a => [a] -> [a] -> [a] go [a] forall a. Monoid a => a mempty where go :: [a] -> [a] -> [a] go [a] _ [] = [] go [a] seen (a x : [a] xs) | a x a -> [a] -> Bool forall a. Eq a => a -> [a] -> Bool forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool `elem` [a] seen = [a] -> [a] -> [a] go [a] seen [a] xs | Bool otherwise = a x a -> [a] -> [a] forall a. a -> [a] -> [a] : [a] -> [a] -> [a] go (a x a -> [a] -> [a] forall a. a -> [a] -> [a] : [a] seen) [a] xs note :: e -> Maybe a -> Either e a note :: forall e a. e -> Maybe a -> Either e a note e e = Either e a -> (a -> Either e a) -> Maybe a -> Either e a forall b a. b -> (a -> b) -> Maybe a -> b maybe (e -> Either e a forall a b. a -> Either a b Left e e) a -> Either e a forall a b. b -> Either a b Right data ColType = CTString | CTNumber | CTBool | CTArray | CTMixed buildColumn :: V.Vector Object -> Text -> D.Column buildColumn :: Vector Object -> Text -> Column buildColumn Vector Object rows Text colName = let key :: Key key = Text -> Key K.fromText Text colName values :: V.Vector (Maybe Value) values :: Vector (Maybe Value) values = (Object -> Maybe Value) -> Vector Object -> Vector (Maybe Value) forall a b. (a -> b) -> Vector a -> Vector b V.map (Key -> Object -> Maybe Value forall v. Key -> KeyMap v -> Maybe v KM.lookup Key key) Vector Object rows colType :: ColType colType = Vector (Maybe Value) -> ColType detectColType Vector (Maybe Value) values in case ColType colType of ColType CTString -> Vector (Maybe Text) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column D.fromVector ((Maybe Value -> Maybe Text) -> Vector (Maybe Value) -> Vector (Maybe Text) forall a b. (a -> b) -> Vector a -> Vector b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap ((Value -> Text) -> Maybe Value -> Maybe Text forall a b. (a -> b) -> Maybe a -> Maybe b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap Value -> Text asText) Vector (Maybe Value) values) ColType CTNumber -> Vector (Maybe Double) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column D.fromVector ((Maybe Value -> Maybe Double) -> Vector (Maybe Value) -> Vector (Maybe Double) forall a b. (a -> b) -> Vector a -> Vector b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap ((Value -> Double) -> Maybe Value -> Maybe Double forall a b. (a -> b) -> Maybe a -> Maybe b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap Value -> Double asDouble) Vector (Maybe Value) values) ColType CTBool -> Vector (Maybe Bool) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column D.fromVector ((Maybe Value -> Maybe Bool) -> Vector (Maybe Value) -> Vector (Maybe Bool) forall a b. (a -> b) -> Vector a -> Vector b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap ((Value -> Bool) -> Maybe Value -> Maybe Bool forall a b. (a -> b) -> Maybe a -> Maybe b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap Value -> Bool asBool) Vector (Maybe Value) values) ColType CTArray -> Vector (Maybe (Vector Value)) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column D.fromVector ((Maybe Value -> Maybe (Vector Value)) -> Vector (Maybe Value) -> Vector (Maybe (Vector Value)) forall a b. (a -> b) -> Vector a -> Vector b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap ((Value -> Vector Value) -> Maybe Value -> Maybe (Vector Value) forall a b. (a -> b) -> Maybe a -> Maybe b forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap Value -> Vector Value asArray) Vector (Maybe Value) values) ColType CTMixed -> Vector (Maybe Value) -> Column forall a. (Columnable a, ColumnifyRep (KindOf a) a) => Vector a -> Column D.fromVector Vector (Maybe Value) values detectColType :: V.Vector (Maybe Value) -> ColType detectColType :: Vector (Maybe Value) -> ColType detectColType Vector (Maybe Value) vals = case [Value] nonMissing of [] -> ColType CTMixed [Value] vs | (Value -> Bool) -> [Value] -> Bool forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool all Value -> Bool isString [Value] vs -> ColType CTString | (Value -> Bool) -> [Value] -> Bool forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool all Value -> Bool isNumber [Value] vs -> ColType CTNumber | (Value -> Bool) -> [Value] -> Bool forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool all Value -> Bool isBool [Value] vs -> ColType CTBool | (Value -> Bool) -> [Value] -> Bool forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool all Value -> Bool isArray [Value] vs -> ColType CTArray | Bool otherwise -> ColType CTMixed where nonMissing :: [Value] nonMissing = [Maybe Value] -> [Value] forall a. [Maybe a] -> [a] catMaybes (Vector (Maybe Value) -> [Maybe Value] forall a. Vector a -> [a] V.toList Vector (Maybe Value) vals) isString :: Value -> Bool isString (String Text _) = Bool True isString Value _ = Bool False isNumber :: Value -> Bool isNumber (Number Scientific _) = Bool True isNumber Value _ = Bool False isBool :: Value -> Bool isBool (Bool Bool _) = Bool True isBool Value _ = Bool False isArray :: Value -> Bool isArray (Array Vector Value _) = Bool True isArray Value _ = Bool False asText :: Value -> Text asText :: Value -> Text asText (String Text s) = Text s asText Value v = [Char] -> Text T.pack (Value -> [Char] forall a. Show a => a -> [Char] show Value v) asDouble :: Value -> Double asDouble :: Value -> Double asDouble (Number Scientific s) = forall a. RealFloat a => Scientific -> a toRealFloat @Double Scientific s asDouble Value v = [Char] -> Double forall a. HasCallStack => [Char] -> a error ([Char] -> Double) -> [Char] -> Double forall a b. (a -> b) -> a -> b $ [Char] "asDouble: non-number value: " [Char] -> [Char] -> [Char] forall a. Semigroup a => a -> a -> a <> Value -> [Char] forall a. Show a => a -> [Char] show Value v asBool :: Value -> Bool asBool :: Value -> Bool asBool (Bool Bool b) = Bool b asBool Value v = [Char] -> Bool forall a. HasCallStack => [Char] -> a error ([Char] -> Bool) -> [Char] -> Bool forall a b. (a -> b) -> a -> b $ [Char] "asBool: non-bool value: " [Char] -> [Char] -> [Char] forall a. Semigroup a => a -> a -> a <> Value -> [Char] forall a. Show a => a -> [Char] show Value v asArray :: Value -> V.Vector Value asArray :: Value -> Vector Value asArray (Array Vector Value a) = Vector Value a asArray Value v = [Char] -> Vector Value forall a. HasCallStack => [Char] -> a error ([Char] -> Vector Value) -> [Char] -> Vector Value forall a b. (a -> b) -> a -> b $ [Char] "asArray: non-array value: " [Char] -> [Char] -> [Char] forall a. Semigroup a => a -> a -> a <> Value -> [Char] forall a. Show a => a -> [Char] show Value v