-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcall_graph--1.3--1.4.sql
418 lines (384 loc) · 15.5 KB
/
call_graph--1.3--1.4.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
/* call_graph/call_graph--1.3--1.4.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION UPDATE" to load this file. \quit
CREATE OR REPLACE FUNCTION call_graph_version() RETURNS text AS $$ SELECT text '1.4'; $$ LANGUAGE sql;
CREATE TABLE DailyStats(
CallGraphID bigserial NOT NULL,
Date date NOT NULL,
Calls bigint NOT NULL,
TotalTime double precision NOT NULL,
SelfTime double precision NOT NULL,
FirstCall timestamptz NOT NULL,
LastCall timestamptz NOT NULL,
PRIMARY KEY (CallGraphID, Date)
);
CREATE TABLE HourlyStats(
CallGraphID bigserial NOT NULL,
DateStamp timestamptz NOT NULL CHECK (date_trunc('hour', DateStamp) = DateStamp),
Calls bigint NOT NULL,
TotalTime double precision NOT NULL,
SelfTime double precision NOT NULL,
FirstCall timestamptz NOT NULL,
LastCall timestamptz NOT NULL,
PRIMARY KEY (CallGraphID, DateStamp)
);
-- replace ProcessCallGraphBuffers()
CREATE FUNCTION ProcessCallGraphBuffers(_MaxBufferCount bigint, _MaxHourlyStatAge interval)
RETURNS SETOF bigint
LANGUAGE plpgsql
SET search_path TO @extschema@
AS $function$
DECLARE
_MinBufferID bigint;
_CallGraphID bigint;
_GraphExists bool;
_NumGraphs int;
_ record;
BEGIN
_NumGraphs := 0;
-- The first thing we need to do is to identify the callgraph each CallGraphBufferID represents. We currently do this by
-- calculating the MD5 hash of the binary representation of an array that contains ROW(caller, callee) values ordered by
-- (caller, callee). This hash can then be used to uniquely identify each call graph easily and efficiently.
--
-- In the below query, the subquery aggregates the data for each CallGraphBufferID, calculating the hash representation of
-- the edges. At the same time, it pulls some additional data from the row where Caller = 0 (there should only ever be one
-- such row per CallGraphBufferID, so the choice of aggregate function shouldn't matter; I chose max) which is then stored
-- in the CallGraphs table. Also note that we are grouping by (CallGraphBufferID, TopLevelFunction), which is effectively
-- the same as grouping by only CallGraphBufferID; there should NEVER be more than one TopLevelFunction for a
-- CallGraphBufferID.
--
-- After the subquery is done, we aggregate the data again, this time for each (TopLevelFunction, EdgesHash) pair. This
-- way we can do the processing one callgraph at a time, rather than a CallGraphBufferID at a time.
_MinBufferID = (SELECT min(CallGraphBufferID) FROM call_graph.CallGraphBuffer);
FOR _ IN
SELECT
TopLevelFunction,
EdgesHash,
DateStampHourly,
array_agg(CallGraphBufferID) AS CallGraphBufferIDs,
SUM(Calls) AS Calls,
SUM(TotalTime) AS TotalTime,
SUM(SelfTime) AS SelfTime,
MIN(CallStamp) AS FirstCall,
MAX(CallStamp) AS LatestCall
FROM (
SELECT
CallGraphBufferID,
TopLevelFunction,
date_trunc('hour', DateStamp) AS DateStampHourly,
md5(array_send(array_agg(ROW(Caller, Callee) ORDER BY Caller, Callee))) AS EdgesHash,
MAX(CASE WHEN Caller = 0 THEN Calls END) AS Calls,
MAX(CASE WHEN Caller = 0 THEN TotalTime END) AS TotalTime,
MAX(CASE WHEN Caller = 0 THEN SelfTime END) AS SelfTime,
MAX(Datestamp) AS CallStamp
FROM
(
SELECT
CallGraphBufferID, TopLevelFunction, Caller, Callee, Calls, TotalTime, SelfTime, DateStamp
FROM
CallGraphBuffer
WHERE
CallGraphBufferID >= _MinBufferID AND CallGraphBufferID <= _MinBufferID + _MaxBufferCount
) AS Buffers
GROUP BY CallGraphBufferID, TopLevelFunction, DateStampHourly
) AS GroupedBuffers
GROUP BY TopLevelFunction, EdgesHash, DateStampHourly
LOOP
UPDATE CallGraphs SET
Calls = Calls + _.Calls,
TotalTime = TotalTime + _.TotalTime,
SelfTime = SelfTime + _.SelfTime,
FirstCall = LEAST(FirstCall, _.FirstCall),
LastCall = GREATEST(LastCall, _.LatestCall)
WHERE TopLevelFunction = _.TopLevelFunction
AND EdgesHash = _.EdgesHash
RETURNING CallGraphID INTO _CallGraphID;
IF FOUND THEN
_GraphExists := TRUE;
ELSE
INSERT INTO CallGraphs (TopLevelFunction, EdgesHash, Calls, TotalTime, SelfTime, FirstCall, LastCall)
VALUES (_.TopLevelFunction, _.EdgesHash, _.Calls, _.TotalTime, _.SelfTime, _.FirstCall, _.LatestCall)
RETURNING CallGraphID INTO _CallGraphID;
_GraphExists := FALSE;
END IF;
-- If the graph existed, all of the edges should exist too, and we can simply UPDATE them. If it didn't,
-- we need to add the edges.
--
-- Note that although we're doing multiple CallGraphBufferIDs at a time, we're only working on a single
-- call graph, so we can safely aggregate the data in CallGraphBufferSum to avoid doing multiple UPDATEs.
IF _GraphExists THEN
UPDATE Edges SET
Calls = Edges.Calls + CallGraphBufferSum.Calls,
TotalTime = Edges.TotalTime + CallGraphBufferSum.TotalTime,
SelfTime = Edges.SelfTime + CallGraphBufferSum.SelfTime
FROM (
SELECT
Caller,
Callee,
SUM(Calls) AS Calls,
SUM(TotalTime) AS TotalTime,
SUM(SelfTime) AS SelfTime
FROM CallGraphBuffer
WHERE CallGraphBufferID = ANY(_.CallGraphBufferIDs)
GROUP BY Caller, Callee
) AS CallGraphBufferSum
WHERE Edges.CallGraphID = _CallGraphID
AND Edges.Caller = CallGraphBufferSum.Caller
AND Edges.Callee = CallGraphBufferSum.Callee;
ELSE
INSERT INTO Edges (CallGraphID, Caller, Callee, Calls, TotalTime, SelfTime)
SELECT _CallGraphID, Caller, Callee, SUM(Calls), SUM(TotalTime), SUM(SelfTime)
FROM CallGraphBuffer
WHERE CallGraphBufferID = ANY(_.CallGraphBufferIDs)
GROUP BY Caller, Callee;
END IF;
UPDATE DailyStats SET
Calls = Calls + _.Calls,
TotalTime = TotalTime + _.TotalTime,
SelfTime = SelfTime + _.SelfTime,
FirstCall = LEAST(FirstCall, _.FirstCall),
LastCall = GREATEST(LastCall, _.LatestCall)
WHERE CallGraphID = _CallGraphID
AND date = (_.DateStampHourly)::date;
IF NOT FOUND THEN
INSERT INTO DailyStats (CallGraphID, Date, Calls, TotalTime, SelfTime, FirstCall, LastCall)
VALUES (_CallGraphID, (_.DateStampHourly)::date, _.Calls, _.TotalTime, _.SelfTime, _.FirstCall, _.LatestCall);
END IF;
UPDATE HourlyStats SET
Calls = Calls + _.Calls,
TotalTime = TotalTime + _.TotalTime,
SelfTime = SelfTime + _.SelfTime,
FirstCall = LEAST(FirstCall, _.FirstCall),
LastCall = GREATEST(LastCall, _.LatestCall)
WHERE CallGraphID = _CallGraphID
AND DateStamp = _.DateStampHourly;
IF NOT FOUND THEN
INSERT INTO HourlyStats (CallGraphID, DateStamp, Calls, TotalTime, SelfTime, FirstCall, LastCall)
VALUES (_CallGraphID, _.DateStampHourly, _.Calls, _.TotalTime, _.SelfTime, _.FirstCall, _.LatestCall);
END IF;
-- This function will get called multiple times a day, so try not to clean
-- CallGraphDailyStats on every call. Random() should do fine here since
-- it's not a big deal even if we don't do the cleanup at all during one
-- day (or if we do it several times a day).
IF random() < 0.05 THEN
DELETE FROM HourlyStats WHERE DateStamp < now() - _MaxHourlyStatAge;
END IF;
DELETE FROM CallGraphBuffer WHERE CallGraphBufferID = ANY(_.CallGraphBufferIDs);
WITH Buffers AS (
DELETE FROM
TableAccessBuffer
WHERE
CallGraphBufferID = ANY(_.CallGraphBufferIDs)
RETURNING
relid, seq_scan, seq_tup_read, idx_scan, idx_tup_read,
n_tup_ins, n_tup_upd, n_tup_del
),
GroupedBuffers AS (
SELECT
relid, sum(seq_scan) AS seq_scan, sum(seq_tup_read) AS seq_tup_read,
sum(idx_scan) AS idx_scan, sum(idx_tup_read) AS idx_tup_read,
sum(n_tup_ins) AS n_tup_ins, sum(n_tup_upd) AS n_tup_upd,
sum(n_tup_del) AS n_tup_del
FROM
Buffers
GROUP BY
relid
),
UpdateExisting AS (
UPDATE
TableUsage tu
SET
seq_scan = tu.seq_scan + buf.seq_scan,
seq_tup_read = tu.seq_tup_read + buf.seq_tup_read,
idx_scan = tu.idx_scan + buf.idx_scan,
idx_tup_read = tu.idx_tup_read + buf.idx_tup_read,
n_tup_ins = tu.n_tup_ins + buf.n_tup_ins,
n_tup_upd = tu.n_tup_upd + buf.n_tup_upd,
n_tup_del = tu.n_tup_del + buf.n_tup_del
FROM
GroupedBuffers buf
WHERE
tu.CallGraphID = _CallGraphID AND
tu.relid = buf.relid
)
INSERT INTO
TableUsage (CallGraphID, relid, seq_scan, seq_tup_read, idx_scan, idx_tup_read,
n_tup_ins, n_tup_upd, n_tup_del)
SELECT
_CallGraphID, relid, seq_scan, seq_tup_read, idx_scan, idx_tup_read,
n_tup_ins, n_tup_upd, n_tup_del
FROM
GroupedBuffers buf
WHERE
NOT EXISTS
(SELECT * FROM TableUsage tu
WHERE tu.CallGraphID = _CallGraphID AND tu.relid = buf.relid);
RETURN NEXT _CallGraphID;
END LOOP;
RETURN;
END;
$function$
;
CREATE OR REPLACE FUNCTION ProcessCallGraphBuffers(_MaxBufferCount bigint)
RETURNS SETOF bigint
LANGUAGE plpgsql
SET search_path TO @extschema@
AS $function$
DECLARE
_MinBufferID bigint;
_CallGraphID bigint;
_GraphExists bool;
_NumGraphs int;
_ record;
BEGIN
_NumGraphs := 0;
-- The first thing we need to do is to identify the callgraph each CallGraphBufferID represents. We currently do this by
-- calculating the MD5 hash of the binary representation of an array that contains ROW(caller, callee) values ordered by
-- (caller, callee). This hash can then be used to uniquely identify each call graph easily and efficiently.
--
-- In the below query, the subquery aggregates the data for each CallGraphBufferID, calculating the hash representation of
-- the edges. At the same time, it pulls some additional data from the row where Caller = 0 (there should only ever be one
-- such row per CallGraphBufferID, so the choice of aggregate function shouldn't matter; I chose max) which is then stored
-- in the CallGraphs table. Also note that we are grouping by (CallGraphBufferID, TopLevelFunction), which is effectively
-- the same as grouping by only CallGraphBufferID; there should NEVER be more than one TopLevelFunction for a
-- CallGraphBufferID.
--
-- After the subquery is done, we aggregate the data again, this time for each (TopLevelFunction, EdgesHash) pair. This
-- way we can do the processing one callgraph at a time, rather than a CallGraphBufferID at a time.
_MinBufferID = (SELECT min(CallGraphBufferID) FROM call_graph.CallGraphBuffer);
FOR _ IN
SELECT
TopLevelFunction,
EdgesHash,
array_agg(CallGraphBufferID) AS CallGraphBufferIDs,
SUM(Calls) AS Calls,
SUM(TotalTime) AS TotalTime,
SUM(SelfTime) AS SelfTime,
MIN(CallStamp) AS FirstCall,
MAX(CallStamp) AS LatestCall
FROM (
SELECT
CallGraphBufferID,
TopLevelFunction,
md5(array_send(array_agg(ROW(Caller, Callee) ORDER BY Caller, Callee))) AS EdgesHash,
MAX(CASE WHEN Caller = 0 THEN Calls END) AS Calls,
MAX(CASE WHEN Caller = 0 THEN TotalTime END) AS TotalTime,
MAX(CASE WHEN Caller = 0 THEN SelfTime END) AS SelfTime,
MAX(Datestamp) AS CallStamp
FROM
(
SELECT
CallGraphBufferID, TopLevelFunction, Caller, Callee, Calls, TotalTime, SelfTime, DateStamp
FROM
CallGraphBuffer
WHERE
CallGraphBufferID >= _MinBufferID AND CallGraphBufferID <= _MinBufferID + _MaxBufferCount
) AS Buffers
GROUP BY CallGraphBufferID, TopLevelFunction
) AS GroupedBuffers
GROUP BY TopLevelFunction, EdgesHash
LOOP
UPDATE CallGraphs SET
Calls = Calls + _.Calls,
TotalTime = TotalTime + _.TotalTime,
SelfTime = SelfTime + _.SelfTime,
FirstCall = LEAST(FirstCall, _.FirstCall),
LastCall = GREATEST(LastCall, _.LatestCall)
WHERE TopLevelFunction = _.TopLevelFunction
AND EdgesHash = _.EdgesHash
RETURNING CallGraphID INTO _CallGraphID;
IF FOUND THEN
_GraphExists := TRUE;
ELSE
INSERT INTO CallGraphs (TopLevelFunction, EdgesHash, Calls, TotalTime, SelfTime, FirstCall, LastCall)
VALUES (_.TopLevelFunction, _.EdgesHash, _.Calls, _.TotalTime, _.SelfTime, _.FirstCall, _.LatestCall)
RETURNING CallGraphID INTO _CallGraphID;
_GraphExists := FALSE;
END IF;
-- If the graph existed, all of the edges should exist too, and we can simply UPDATE them. If it didn't,
-- we need to add the edges.
--
-- Note that although we're doing multiple CallGraphBufferIDs at a time, we're only working on a single
-- call graph, so we can safely aggregate the data in CallGraphBufferSum to avoid doing multiple UPDATEs.
IF _GraphExists THEN
UPDATE Edges SET
Calls = Edges.Calls + CallGraphBufferSum.Calls,
TotalTime = Edges.TotalTime + CallGraphBufferSum.TotalTime,
SelfTime = Edges.SelfTime + CallGraphBufferSum.SelfTime
FROM (
SELECT
Caller,
Callee,
SUM(Calls) AS Calls,
SUM(TotalTime) AS TotalTime,
SUM(SelfTime) AS SelfTime
FROM CallGraphBuffer
WHERE CallGraphBufferID = ANY(_.CallGraphBufferIDs)
GROUP BY Caller, Callee
) AS CallGraphBufferSum
WHERE Edges.CallGraphID = _CallGraphID
AND Edges.Caller = CallGraphBufferSum.Caller
AND Edges.Callee = CallGraphBufferSum.Callee;
ELSE
INSERT INTO Edges (CallGraphID, Caller, Callee, Calls, TotalTime, SelfTime)
SELECT _CallGraphID, Caller, Callee, SUM(Calls), SUM(TotalTime), SUM(SelfTime)
FROM CallGraphBuffer
WHERE CallGraphBufferID = ANY(_.CallGraphBufferIDs)
GROUP BY Caller, Callee;
END IF;
DELETE FROM CallGraphBuffer WHERE CallGraphBufferID = ANY(_.CallGraphBufferIDs);
WITH Buffers AS (
DELETE FROM
TableAccessBuffer
WHERE
CallGraphBufferID = ANY(_.CallGraphBufferIDs)
RETURNING
relid, seq_scan, seq_tup_read, idx_scan, idx_tup_read,
n_tup_ins, n_tup_upd, n_tup_del
),
GroupedBuffers AS (
SELECT
relid, sum(seq_scan) AS seq_scan, sum(seq_tup_read) AS seq_tup_read,
sum(idx_scan) AS idx_scan, sum(idx_tup_read) AS idx_tup_read,
sum(n_tup_ins) AS n_tup_ins, sum(n_tup_upd) AS n_tup_upd,
sum(n_tup_del) AS n_tup_del
FROM
Buffers
GROUP BY
relid
),
UpdateExisting AS (
UPDATE
TableUsage tu
SET
seq_scan = tu.seq_scan + buf.seq_scan,
seq_tup_read = tu.seq_tup_read + buf.seq_tup_read,
idx_scan = tu.idx_scan + buf.idx_scan,
idx_tup_read = tu.idx_tup_read + buf.idx_tup_read,
n_tup_ins = tu.n_tup_ins + buf.n_tup_ins,
n_tup_upd = tu.n_tup_upd + buf.n_tup_upd,
n_tup_del = tu.n_tup_del + buf.n_tup_del
FROM
GroupedBuffers buf
WHERE
tu.CallGraphID = _CallGraphID AND
tu.relid = buf.relid
)
INSERT INTO
TableUsage (CallGraphID, relid, seq_scan, seq_tup_read, idx_scan, idx_tup_read,
n_tup_ins, n_tup_upd, n_tup_del)
SELECT
_CallGraphID, relid, seq_scan, seq_tup_read, idx_scan, idx_tup_read,
n_tup_ins, n_tup_upd, n_tup_del
FROM
GroupedBuffers buf
WHERE
NOT EXISTS
(SELECT * FROM TableUsage tu
WHERE tu.CallGraphID = _CallGraphID AND tu.relid = buf.relid);
RETURN NEXT _CallGraphID;
END LOOP;
RETURN;
END;
$function$
;