@@ -304,9 +304,31 @@ readParquet path = withBinaryFile path ReadMode $ \handle -> do
304304 when (magicString /= " PAR1" ) $ error " Invalid Parquet file"
305305
306306 metadata <- readMetadata handle size
307- print metadata
307+ -- print metadata
308+ forM_ (rowGroups metadata) $ \ r -> do
309+ forM_ (rowGroupColumns r) $ \ c -> do
310+ -- print c
311+ let metadata = columnMetaData c
312+ let colDataPageOffset = columnDataPageOffset metadata
313+ let colDictionaryPageOffset = columnDictionaryPageOffset metadata
314+ let colStart = if colDictionaryPageOffset > 0 && colDataPageOffset > colDictionaryPageOffset
315+ then colDictionaryPageOffset
316+ else colDataPageOffset
317+ let colLength = columnTotalCompressedSize metadata
318+ -- print (colStart, colLength)
319+ columnBytes <- readBytes handle colStart colLength
320+ print $ columnBytes
308321 return DI. empty
309322
323+ readBytes :: Handle -> Int64 -> Int64 -> IO [Word8 ]
324+ readBytes handle colStart colLen = do
325+ buf <- mallocBytes (fromIntegral colLen) :: IO (Ptr Word8 )
326+ hSeek handle AbsoluteSeek (fromIntegral colStart)
327+ _ <- hGetBuf handle buf (fromIntegral colLen)
328+ columnBytes <- readByteString' buf colLen
329+ free buf
330+ pure columnBytes
331+
310332numBytesInFile :: Handle -> IO Integer
311333numBytesInFile handle = do
312334 hSeek handle SeekFromEnd 0
@@ -958,6 +980,9 @@ readByteString buf pos = do
958980 size <- readVarIntFromBuffer @ Int buf pos
959981 replicateM size (readAndAdvance pos buf)
960982
983+ readByteString' :: Ptr Word8 -> Int64 -> IO [Word8 ]
984+ readByteString' buf size = mapM (`readSingleByte` buf) [0 .. (size - 1 )]
985+
961986readField :: Ptr Word8 -> IORef Int -> Int16 -> [Int16 ] -> IO (Maybe (TType , Int16 ))
962987readField buf pos lastFieldId fieldStack = do
963988 t <- readAndAdvance pos buf
@@ -979,6 +1004,9 @@ readAndAdvance bufferPos buffer = do
9791004 modifyIORef bufferPos (+ 1 )
9801005 return b
9811006
1007+ readSingleByte :: Int64 -> Ptr b -> IO Word8
1008+ readSingleByte pos buffer = peekByteOff buffer (fromIntegral pos)
1009+
9821010readNoAdvance :: IORef Int -> Ptr b -> IO Word8
9831011readNoAdvance bufferPos buffer = do
9841012 pos <- readIORef bufferPos
0 commit comments