-
Notifications
You must be signed in to change notification settings - Fork 220
Expand file tree
/
Copy pathState.hs
More file actions
293 lines (248 loc) · 11.4 KB
/
State.hs
File metadata and controls
293 lines (248 loc) · 11.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
{-# LANGUAGE TemplateHaskell, StandaloneDeriving, GeneralizedNewtypeDeriving,
DeriveDataTypeable, TypeFamilies, FlexibleInstances,
MultiParamTypeClasses, BangPatterns #-}
module Distribution.Server.Features.DownloadCount.State where
import Data.Time.Calendar (Day(..))
import Data.Foldable (forM_)
import Control.Arrow (first)
import Control.Monad (liftM)
import Data.List (foldl', groupBy)
import Data.Function (on)
import Control.Monad.Reader (ask, asks)
import Control.Monad.State (get, put)
import qualified Data.Map.Lazy as Map
import System.FilePath ((</>))
import System.Directory (
getDirectoryContents
, createDirectoryIfMissing
)
import qualified Data.ByteString.Lazy as BSL
import System.IO (withFile, IOMode (..))
import System.IO.Unsafe (unsafeInterleaveIO)
import Text.CSV (printCSV)
import Control.Exception (evaluate)
import Data.Acid (Update, Query, makeAcidic)
import Data.SafeCopy (base, deriveSafeCopy, safeGet, safePut)
import Data.Serialize.Get (runGetLazy)
import Data.Serialize.Put (runPutLazy)
import Distribution.Version (Version)
import Distribution.Package (
PackageId
, PackageName
, packageName
, packageVersion
)
import Distribution.Text (simpleParse, display)
import Distribution.Simple.Utils (writeFileAtomic)
import Distribution.Server.Framework.Instances ()
import Distribution.Server.Framework.MemSize
import Distribution.Server.Util.CountingMap
{------------------------------------------------------------------------------
Data types
------------------------------------------------------------------------------}
data InMemStats = InMemStats {
inMemToday :: !Day
, inMemCounts :: !(SimpleCountingMap PackageId)
}
deriving (Show, Eq)
newtype OnDiskStats = OnDiskStats {
onDiskStats :: NestedCountingMap PackageName OnDiskPerPkg
}
deriving (Show, Eq, MemSize)
instance CountingMap (PackageName, (Day, Version)) OnDiskStats where
cmEmpty = OnDiskStats cmEmpty
cmTotal (OnDiskStats ncm) = cmTotal ncm
cmInsert kl n (OnDiskStats ncm) = OnDiskStats $ cmInsert kl n ncm
cmFind k (OnDiskStats ncm) = cmFind k ncm
cmUnion (OnDiskStats a)
(OnDiskStats b) = OnDiskStats (cmUnion a b)
cmToList (OnDiskStats ncm) = cmToList ncm
cmToCSV (OnDiskStats ncm) = cmToCSV ncm
cmInsertRecord r (OnDiskStats ncm) = first OnDiskStats `liftM` cmInsertRecord r ncm
newtype OnDiskPerPkg = OnDiskPerPkg {
onDiskPerPkgCounts :: NestedCountingMap Day (SimpleCountingMap Version)
}
deriving (Show, Eq, Ord, MemSize)
instance CountingMap (Day, Version) OnDiskPerPkg where
cmEmpty = OnDiskPerPkg cmEmpty
cmTotal (OnDiskPerPkg ncm) = cmTotal ncm
cmInsert kl n (OnDiskPerPkg ncm) = OnDiskPerPkg $ cmInsert kl n ncm
cmFind k (OnDiskPerPkg ncm) = cmFind k ncm
cmUnion (OnDiskPerPkg a) (OnDiskPerPkg b) = OnDiskPerPkg (cmUnion a b)
cmToList (OnDiskPerPkg ncm) = cmToList ncm
cmToCSV (OnDiskPerPkg ncm) = cmToCSV ncm
cmInsertRecord r (OnDiskPerPkg ncm) = first OnDiskPerPkg `liftM` cmInsertRecord r ncm
newtype RecentDownloads = RecentDownloads {
recentDownloads :: SimpleCountingMap PackageName
}
deriving (Show, Eq, MemSize)
instance CountingMap PackageName RecentDownloads where
cmEmpty = RecentDownloads cmEmpty
cmTotal (RecentDownloads ncm) = cmTotal ncm
cmInsert kl n (RecentDownloads ncm) = RecentDownloads $ cmInsert kl n ncm
cmFind k (RecentDownloads ncm) = cmFind k ncm
cmUnion (RecentDownloads a) (RecentDownloads b) = RecentDownloads (cmUnion a b)
cmToList (RecentDownloads ncm) = cmToList ncm
cmToCSV (RecentDownloads ncm) = cmToCSV ncm
cmInsertRecord r (RecentDownloads ncm) = first RecentDownloads `liftM` cmInsertRecord r ncm
newtype TotalDownloads = TotalDownloads {
totalDownloads :: SimpleCountingMap PackageName
}
deriving (Show, Eq, MemSize)
instance CountingMap PackageName TotalDownloads where
cmEmpty = TotalDownloads cmEmpty
cmTotal (TotalDownloads ncm) = cmTotal ncm
cmInsert kl n (TotalDownloads ncm) = TotalDownloads $ cmInsert kl n ncm
cmFind k (TotalDownloads ncm) = cmFind k ncm
cmUnion (TotalDownloads a) (TotalDownloads b) = TotalDownloads (cmUnion a b)
cmToList (TotalDownloads ncm) = cmToList ncm
cmToCSV (TotalDownloads ncm) = cmToCSV ncm
cmInsertRecord r (TotalDownloads ncm) = first TotalDownloads `liftM` cmInsertRecord r ncm
{------------------------------------------------------------------------------
Initial instances
------------------------------------------------------------------------------}
initInMemStats :: Day -> InMemStats
initInMemStats day = InMemStats {
inMemToday = day
, inMemCounts = cmEmpty
}
type DayRange = (Day, Day)
initRecentAndTotalDownloads :: DayRange -> OnDiskStats
-> (RecentDownloads, TotalDownloads)
initRecentAndTotalDownloads dayRange (OnDiskStats (NCM _ m)) =
foldl' (\(recent, total) (pname, pstats) ->
let !recent' = accumRecentDownloads dayRange pname pstats recent
!total' = accumTotalDownloads pname pstats total
in (recent', total'))
(emptyRecentDownloads, emptyTotalDownloads)
(Map.toList m)
emptyRecentDownloads :: RecentDownloads
emptyRecentDownloads = RecentDownloads cmEmpty
accumRecentDownloads :: DayRange
-> PackageName -> OnDiskPerPkg
-> RecentDownloads -> RecentDownloads
accumRecentDownloads dayRange pkgName (OnDiskPerPkg (NCM _ perDay))
| let rangeTotal = sum (map cmTotal (lookupRange dayRange perDay))
, rangeTotal > 0
= cmInsert pkgName rangeTotal
| otherwise = id
lookupRange :: Ord k => (k,k) -> Map.Map k a -> [a]
lookupRange (l,u) m =
let (_,ml,above) = Map.splitLookup l m
(middle,mu,_) = Map.splitLookup u above
in maybe [] (\x->[x]) ml
++ Map.elems middle
++ maybe [] (\x->[x]) mu
emptyTotalDownloads :: TotalDownloads
emptyTotalDownloads = TotalDownloads cmEmpty
accumTotalDownloads :: PackageName -> OnDiskPerPkg
-> TotalDownloads -> TotalDownloads
accumTotalDownloads pkgName (OnDiskPerPkg perPkg) =
cmInsert pkgName (cmTotal perPkg)
{------------------------------------------------------------------------------
Pure updates/queries
------------------------------------------------------------------------------}
updateHistory :: InMemStats -> OnDiskStats -> OnDiskStats
updateHistory (InMemStats day perPkg) (OnDiskStats (NCM _ m)) =
OnDiskStats (NCM 0 (Map.unionWith cmUnion m updatesMap))
where
updatesMap :: Map.Map PackageName OnDiskPerPkg
updatesMap = Map.fromList
[ (pkgname, applyUpdates pkgs)
| pkgs <- groupBy ((==) `on` (packageName . fst))
(cmToList perPkg :: [(PackageId, Int)])
, let pkgname = packageName (fst (head pkgs))
]
applyUpdates :: [(PackageId, Int)] -> OnDiskPerPkg
applyUpdates pkgs = foldr (.) id
[ cmInsert (day, packageVersion pkgId) count
| (pkgId, count) <- pkgs ]
cmEmpty
{------------------------------------------------------------------------------
MemSize
------------------------------------------------------------------------------}
instance MemSize InMemStats where
memSize (InMemStats a b) = memSize2 a b
{------------------------------------------------------------------------------
Serializing on-disk stats
------------------------------------------------------------------------------}
deriveSafeCopy 0 'base ''InMemStats
deriveSafeCopy 0 'base ''OnDiskPerPkg
-- | This processes all of the files. If you're only interested in a
-- couple of them, use 'readOnDiskStatsLazily' instead, to avoid most
-- of the IO.
readOnDiskStatsEagerly :: FilePath -> IO OnDiskStats
readOnDiskStatsEagerly = readOnDiskStatsHelper False
-- | Compared to 'readOnDiskStatsEagerly', this defers opening and
-- processing the files; each file will be opened and processed when
-- the corresponding entry in the result map is forced.
--
-- It is therefore perhaps unwise to call this function and then
-- quickly force every entry of the map. It's not easy to predict
-- whether it will open many files before closing any of them; might
-- depend on the behaviors of the threaded RTS\/the platform\/etc.
--
-- So if you're going to quickly force many of the packages' entries
-- in the map, you could instead call 'readOnDiskStatsEagerly' to
-- avoid any risk of exhausing file descriptions: every file will be
-- closed before the next is opened.
--
-- But we haven't /actually demonstrated/ 'readOnDiskStatsLazily'
-- risks exhausting file descriptors; we merely just suspect it
-- might. See the FdExhaustion-DownloadCount-Hypothesis
-- benchmark. Thus, for now, the rest of the codebase continues to use
-- only 'readOnDiskStateLazily', since that's what it used before we
-- started investigating.
readOnDiskStatsLazily :: FilePath -> IO OnDiskStats
readOnDiskStatsLazily = readOnDiskStatsHelper True
-- | See 'readOnDiskStatsLazily' and 'readOnDiskStatsEagerly'.
readOnDiskStatsHelper :: Bool -> FilePath -> IO OnDiskStats
readOnDiskStatsHelper whetherToInterleaveIO stateDir = do
createDirectoryIfMissing True stateDir
pkgStrs <- getDirectoryContents stateDir
OnDiskStats . NCM 0 . Map.fromList <$> sequence
[ do onDiskPerPkg <- (if whetherToInterleaveIO then unsafeInterleaveIO else id) $
either (const cmEmpty) id
<$> readOnDiskPerPkg pkgFile
return (pkgName, onDiskPerPkg)
| Just pkgName <- map simpleParse pkgStrs
, let pkgFile = stateDir </> display pkgName ]
readOnDiskPerPkg :: FilePath -> IO (Either String OnDiskPerPkg)
readOnDiskPerPkg pkgFile =
withFile pkgFile ReadMode $ \h ->
-- By evaluating the Either result from the parser we force
-- all contents to be read
evaluate =<< (runGetLazy safeGet <$> BSL.hGetContents h)
writeOnDiskStats :: FilePath -> OnDiskStats -> IO ()
writeOnDiskStats stateDir (OnDiskStats (NCM _ onDisk)) = do
createDirectoryIfMissing True stateDir
forM_ (Map.toList onDisk) $ \(pkgName, perPkg) -> do
let pkgFile = stateDir </> display pkgName
writeFileAtomic pkgFile $ runPutLazy (safePut perPkg)
{------------------------------------------------------------------------------
The append-only all-time log
------------------------------------------------------------------------------}
appendToLog :: FilePath -> InMemStats -> IO ()
appendToLog stateDir (InMemStats _ inMemStats) =
appendFile (stateDir </> "log") $ printCSV (cmToCSV inMemStats)
reconstructLog :: FilePath -> OnDiskStats -> IO ()
reconstructLog stateDir onDisk =
writeFile (stateDir </> "log") $ printCSV (cmToCSV onDisk)
{------------------------------------------------------------------------------
ACID stuff
------------------------------------------------------------------------------}
getInMemStats :: Query InMemStats InMemStats
getInMemStats = ask
replaceInMemStats :: InMemStats -> Update InMemStats ()
replaceInMemStats = put
recordedToday :: Query InMemStats Day
recordedToday = asks inMemToday
registerDownload :: PackageId -> Update InMemStats ()
registerDownload pkgId = do
InMemStats day counts <- get
put $ InMemStats day (cmInsert pkgId 1 counts)
makeAcidic ''InMemStats [ 'getInMemStats
, 'replaceInMemStats
, 'recordedToday
, 'registerDownload
]