-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcall_graph--1.1--1.2.sql
152 lines (136 loc) · 5.92 KB
/
call_graph--1.1--1.2.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/* call_graph/call_graph--1.1--1.2.sql */
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION UPDATE" to load this file. \quit
CREATE OR REPLACE FUNCTION call_graph_version() RETURNS text AS $$ SELECT text '1.2'; $$ LANGUAGE sql;
DROP INDEX CallGraphBuffer_CallGraphBufferID_Index;
CREATE INDEX CallGraphBuffer_CallGraphBufferID_TopLevelFunction_Index ON CallGraphBuffer(CallGraphBufferID, TopLevelFunction);
-- Replace ProcessCallGraphBuffers(). The function signature changed enough from 1.1 to
-- 1.2 that we have to drop the previous one.
DROP FUNCTION ProcessCallGraphBuffers();
CREATE OR REPLACE FUNCTION ProcessCallGraphBuffers(_MaxBufferCount bigint)
RETURNS SETOF bigint
LANGUAGE plpgsql
SET search_path TO @extschema@
AS $function$
DECLARE
_MinBufferID bigint;
_CallGraphID bigint;
_GraphExists bool;
_NumGraphs int;
_ record;
BEGIN
_NumGraphs := 0;
-- The first thing we need to do is to identify the callgraph each CallGraphBufferID represents. We currently do this by
-- calculating the MD5 hash of the binary representation of an array that contains ROW(caller, callee) values ordered by
-- (caller, callee). This hash can then be used to uniquely identify each call graph easily and efficiently.
--
-- In the below query, the subquery aggregates the data for each CallGraphBufferID, calculating the hash representation of
-- the edges. At the same time, it pulls some additional data from the row where Caller = 0 (there should only ever be one
-- such row per CallGraphBufferID, so the choice of aggregate function shouldn't matter; I chose max) which is then stored
-- in the CallGraphs table. Also note that we are grouping by (CallGraphBufferID, TopLevelFunction), which is effectively
-- the same as grouping by only CallGraphBufferID; there should NEVER be more than one TopLevelFunction for a
-- CallGraphBufferID.
--
-- After the subquery is done, we aggregate the data again, this time for each (TopLevelFunction, EdgesHash) pair. This
-- way we can do the processing one callgraph at a time, rather than a CallGraphBufferID at a time.
_MinBufferID = (SELECT min(CallGraphBufferID) FROM call_graph.CallGraphBuffer);
FOR _ IN
SELECT
TopLevelFunction,
EdgesHash,
array_agg(CallGraphBufferID) AS CallGraphBufferIDs,
SUM(Calls) AS Calls,
SUM(TotalTime) AS TotalTime,
SUM(SelfTime) AS SelfTime,
MIN(CallStamp) AS FirstCall,
MAX(CallStamp) AS LatestCall
FROM (
SELECT
CallGraphBufferID,
TopLevelFunction,
md5(array_send(array_agg(ROW(Caller, Callee) ORDER BY Caller, Callee))) AS EdgesHash,
MAX(CASE WHEN Caller = 0 THEN Calls END) AS Calls,
MAX(CASE WHEN Caller = 0 THEN TotalTime END) AS TotalTime,
MAX(CASE WHEN Caller = 0 THEN SelfTime END) AS SelfTime,
MAX(Datestamp) AS CallStamp
FROM
(
SELECT
CallGraphBufferID, TopLevelFunction, Caller, Callee, Calls, TotalTime, SelfTime, DateStamp
FROM
CallGraphBuffer
WHERE
CallGraphBufferID >= _MinBufferID AND CallGraphBufferID <= _MinBufferID + _MaxBufferCount
) AS Buffers
GROUP BY CallGraphBufferID, TopLevelFunction
) AS GroupedBuffers
GROUP BY TopLevelFunction, EdgesHash
LOOP
UPDATE CallGraphs SET
Calls = Calls + _.Calls,
TotalTime = TotalTime + _.TotalTime,
SelfTime = SelfTime + _.SelfTime,
LastCall = _.LatestCall
WHERE TopLevelFunction = _.TopLevelFunction
AND EdgesHash = _.EdgesHash
RETURNING CallGraphID INTO _CallGraphID;
IF FOUND THEN
_GraphExists := TRUE;
ELSE
INSERT INTO CallGraphs (TopLevelFunction, EdgesHash, Calls, TotalTime, SelfTime, FirstCall, LastCall)
VALUES (_.TopLevelFunction, _.EdgesHash, _.Calls, _.TotalTime, _.SelfTime, _.FirstCall, _.LatestCall)
RETURNING CallGraphID INTO _CallGraphID;
_GraphExists := FALSE;
END IF;
-- If the graph existed, all of the edges should exist too, and we can simply UPDATE them. If it didn't,
-- we need to add the edges.
--
-- Note that although we're doing multiple CallGraphBufferIDs at a time, we're only working on a single
-- call graph, so we can safely aggregate the data in CallGraphBufferSum to avoid doing multiple UPDATEs.
IF _GraphExists THEN
UPDATE Edges SET
Calls = Edges.Calls + CallGraphBufferSum.Calls,
TotalTime = Edges.TotalTime + CallGraphBufferSum.TotalTime,
SelfTime = Edges.SelfTime + CallGraphBufferSum.SelfTime
FROM (
SELECT
Caller,
Callee,
SUM(Calls) AS Calls,
SUM(TotalTime) AS TotalTime,
SUM(SelfTime) AS SelfTime
FROM CallGraphBuffer
WHERE CallGraphBufferID = ANY(_.CallGraphBufferIDs)
GROUP BY Caller, Callee
) AS CallGraphBufferSum
WHERE Edges.CallGraphID = _CallGraphID
AND Edges.Caller = CallGraphBufferSum.Caller
AND Edges.Callee = CallGraphBufferSum.Callee;
ELSE
INSERT INTO Edges (CallGraphID, Caller, Callee, Calls, TotalTime, SelfTime)
SELECT _CallGraphID, Caller, Callee, SUM(Calls), SUM(TotalTime), SUM(SelfTime)
FROM CallGraphBuffer
WHERE CallGraphBufferID = ANY(_.CallGraphBufferIDs)
GROUP BY Caller, Callee;
END IF;
DELETE FROM CallGraphBuffer WHERE CallGraphBufferID = ANY(_.CallGraphBufferIDs);
-- We can process the table usage buffers in one go since we don't (currently) need to UPDATE any
-- previously existing values.
WITH Buffers AS (
DELETE FROM TableAccessBuffer
WHERE CallGraphBufferID = ANY(_.CallGraphBufferIDs)
RETURNING CallGraphBufferID, relid
)
INSERT INTO TableUsage (CallGraphID, relid)
SELECT DISTINCT _CallGraphID, relid
FROM Buffers tub
WHERE
NOT EXISTS
(SELECT * FROM TableUsage tu
WHERE tu.CallGraphID = _CallGraphID AND tu.relid = tub.relid);
RETURN NEXT _CallGraphID;
END LOOP;
RETURN;
END;
$function$
;