--------------------------------------------------------------------- -- T-SQL Querying and Query Tuning for Data Analysis and Beyond -- © Itzik Ben-Gan --------------------------------------------------------------------- SET NOCOUNT ON; USE TSQLV6; -- https://itziktsql.com/r-downloads --------------------------------------------------------------------- -- Window Functions --------------------------------------------------------------------- --------------------------------------------------------------------- -- Aggregate Window Functions --------------------------------------------------------------------- --------------------------------------------------------------------- -- Described --------------------------------------------------------------------- SELECT empid, ordermonth, qty, SUM(qty) OVER(PARTITION BY empid ORDER BY ordermonth ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS runqty FROM Sales.EmpOrders; --------------------------------------------------------------------- -- Window Partition Clause --------------------------------------------------------------------- SELECT orderid, custid, val, CAST(100. * val / SUM(val) OVER() AS NUMERIC(5, 2)) AS pctall, CAST(100. * val / SUM(val) OVER(PARTITION BY custid) AS NUMERIC(5, 2)) AS pctcust FROM Sales.OrderValues; --------------------------------------------------------------------- -- Window Frame --------------------------------------------------------------------- --------------------------------------------------------------------- -- Window Frame Unit: ROWS --------------------------------------------------------------------- -- running totals with window function SELECT empid, ordermonth, qty, SUM(qty) OVER(PARTITION BY empid ORDER BY ordermonth ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS runqty FROM Sales.EmpOrders; -- with join (very slow!) SELECT O1.empid, O1.ordermonth, O1.qty, SUM(O2.qty) AS runqty FROM Sales.EmpOrders AS O1 INNER JOIN Sales.EmpOrders AS O2 ON O2.empid = O1.empid AND O2.ordermonth <= O1.ordermonth GROUP BY O1.empid, O1.ordermonth, O1.qty; -- different frame options SELECT empid, ordermonth, qty AS curqty, AVG(qty) OVER(PARTITION BY empid ORDER BY ordermonth ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS avgqty FROM Sales.EmpOrders; --------------------------------------------------------------------- -- Window Frame Unit: RANGE --------------------------------------------------------------------- -- standard query, not supported yet in SQL Server /* SELECT empid, ordermonth, qty, SUM(qty) OVER(PARTITION BY empid ORDER BY ordermonth RANGE BETWEEN INTERVAL '2' MONTH PRECEDING AND CURRENT ROW) AS sum3month FROM Sales.EmpOrders; */ -- alternative in SQL Server SELECT O1.empid, O1.ordermonth, O1.qty, SUM(O2.qty) AS sum3month FROM Sales.EmpOrders AS O1 INNER JOIN Sales.EmpOrders AS O2 ON O2.empid = O1.empid AND O2.ordermonth BETWEEN DATEADD(month, -2, O1.ordermonth) AND O1.ordermonth GROUP BY O1.empid, O1.ordermonth, O1.qty; --------------------------------------------------------------------- -- Optimization --------------------------------------------------------------------- -- sample data IF OBJECT_ID('dbo.Transactions', 'U') IS NOT NULL DROP TABLE dbo.Transactions; IF OBJECT_ID('dbo.Accounts', 'U') IS NOT NULL DROP TABLE dbo.Accounts; CREATE TABLE dbo.Accounts ( actid INT NOT NULL CONSTRAINT PK_Accounts PRIMARY KEY ); CREATE TABLE dbo.Transactions ( actid INT NOT NULL, tranid INT NOT NULL, val MONEY NOT NULL, CONSTRAINT PK_Transactions PRIMARY KEY(actid, tranid) -- creates POC index ); DECLARE @num_partitions AS INT = 100, @rows_per_partition AS INT = 20000; INSERT INTO dbo.Accounts WITH (TABLOCK) (actid) SELECT NP.n FROM dbo.GetNums(1, @num_partitions) AS NP; INSERT INTO dbo.Transactions WITH (TABLOCK) (actid, tranid, val) SELECT NP.n, RPP.n, (ABS(CHECKSUM(NEWID())%2)*2-1) * (1 + ABS(CHECKSUM(NEWID())%5)) FROM dbo.GetNums(1, @num_partitions) AS NP CROSS JOIN dbo.GetNums(1, @rows_per_partition) AS RPP; GO -- query -- To test pre-2019, set compatibility level to 140 ALTER DATABASE CURRENT SET COMPATIBILITY_LEVEL = 140; SELECT actid, tranid, val, SUM(val) OVER(PARTITION BY actid ORDER BY tranid ROWS UNBOUNDED PRECEDING) AS balance FROM dbo.Transactions; -- Batch mode Window Aggregate operator -- Without batch mode on rowstore at least one columnstore index must be present CREATE NONCLUSTERED COLUMNSTORE INDEX idx_cs_dummy ON dbo.Transactions(actid) WHERE actid = -1 AND actid = -2; SELECT actid, tranid, val, SUM(val) OVER(PARTITION BY actid ORDER BY tranid ROWS UNBOUNDED PRECEDING) AS balance FROM dbo.Transactions; -- Batch mode on rowstore, compat level >= 150 ALTER DATABASE CURRENT SET COMPATIBILITY_LEVEL = 150; DROP INDEX IF EXISTS idx_cs_dummy ON dbo.Transactions; SELECT actid, tranid, val, SUM(val) OVER(PARTITION BY actid ORDER BY tranid ROWS UNBOUNDED PRECEDING) AS balance FROM dbo.Transactions; -- cleanup IF OBJECT_ID('dbo.Transactions', 'U') IS NOT NULL DROP TABLE dbo.Transactions; IF OBJECT_ID('dbo.Accounts', 'U') IS NOT NULL DROP TABLE dbo.Accounts; ALTER DATABASE CURRENT SET COMPATIBILITY_LEVEL = 160; -- 2022 --------------------------------------------------------------------- -- Ranking Window Functions --------------------------------------------------------------------- -- Creating and populating the Orders table SET NOCOUNT ON; USE tempdb; IF OBJECT_ID('dbo.Orders') IS NOT NULL DROP TABLE dbo.Orders; CREATE TABLE dbo.Orders ( orderid INT NOT NULL, orderdate DATE NOT NULL, empid INT NOT NULL, custid VARCHAR(5) NOT NULL, qty INT NOT NULL, CONSTRAINT PK_Orders PRIMARY KEY NONCLUSTERED(orderid) ); GO CREATE UNIQUE CLUSTERED INDEX idx_UC_orderdate_orderid ON dbo.Orders(orderdate, orderid); INSERT INTO dbo.Orders(orderid, orderdate, empid, custid, qty) VALUES(30001, '20210802', 3, 'B', 10), (10001, '20211224', 1, 'C', 10), (10005, '20211224', 1, 'A', 30), (40001, '20220109', 4, 'A', 40), (10006, '20220118', 1, 'C', 10), (20001, '20220212', 2, 'B', 20), (40005, '20220212', 4, 'A', 10), (20002, '20220216', 2, 'C', 20), (30003, '20220418', 3, 'B', 15), (30004, '20220418', 3, 'B', 20), (30007, '20220907', 3, 'C', 30); GO -- ranking SELECT orderid, qty, ROW_NUMBER() OVER(ORDER BY qty) AS rownum, RANK() OVER(ORDER BY qty) AS rnk, DENSE_RANK() OVER(ORDER BY qty) AS densernk, NTILE(4) OVER(ORDER BY qty) AS ntile4 FROM dbo.Orders; -- example with partitioning SELECT custid, orderid, qty, ROW_NUMBER() OVER(PARTITION BY custid ORDER BY orderid) AS rownum FROM dbo.Orders ORDER BY custid, orderid; --------------------------------------------------------------------- -- Offset Window Functions --------------------------------------------------------------------- -- LAG and LEAD SELECT custid, orderid, orderdate, qty, LAG(qty) OVER(PARTITION BY custid ORDER BY orderdate, orderid) AS prevqty, LEAD(qty) OVER(PARTITION BY custid ORDER BY orderdate, orderid) AS nextqty FROM dbo.Orders ORDER BY custid, orderdate, orderid; -- FIRST_VALUE and LAST_VALUE SELECT custid, orderid, orderdate, qty, FIRST_VALUE(qty) OVER(PARTITION BY custid ORDER BY orderdate, orderid ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS firstqty, LAST_VALUE(qty) OVER(PARTITION BY custid ORDER BY orderdate, orderid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS lastqty FROM dbo.Orders ORDER BY custid, orderdate, orderid; GO -- NULL treatment clause DROP TABLE IF EXISTS dbo.T1; CREATE TABLE dbo.T1 ( id INT NOT NULL CONSTRAINT PK_T1 PRIMARY KEY, col1 INT NULL, col2 INT NULL ); GO INSERT INTO dbo.T1(id, col1, col2) VALUES ( 2, NULL, 200), ( 3, 10, NULL), ( 5, -1, NULL), ( 7, NULL, 202), (11, NULL, 150), (13, -12, 50), (17, NULL, 180), (19, NULL, 170), (23, 1759, NULL); -- With NULL treatment clause SELECT id, col1, LAST_VALUE(col1) IGNORE NULLS OVER( ORDER BY id ROWS UNBOUNDED PRECEDING ) AS lastknowncol FROM dbo.T1; -- Without NULL treatment clause WITH C AS ( SELECT id, col1, MAX(CASE WHEN col1 IS NOT NULL THEN id END) OVER(ORDER BY id ROWS UNBOUNDED PRECEDING) AS grp FROM dbo.T1 ) SELECT id, col1, MAX(col1) OVER(PARTITION BY grp ORDER BY id ROWS UNBOUNDED PRECEDING) AS lastknowncol1 FROM C; --------------------------------------------------------------------- -- Statistical Window Functions --------------------------------------------------------------------- -- Calculating Median with PERCENTILE_DISC and PERCENTILE_CONT SELECT testid, score, PERCENTILE_DISC(0.5) WITHIN GROUP(ORDER BY score) OVER(PARTITION BY testid) AS percentiledisc, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY score) OVER(PARTITION BY testid) AS percentilecont FROM Stats.Scores; -- To get once per group use DISTINCT SELECT DISTINCT testid, PERCENTILE_DISC(0.5) WITHIN GROUP(ORDER BY score) OVER(PARTITION BY testid) AS percentiledisc, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY score) OVER(PARTITION BY testid) AS percentilecont FROM Stats.Scores; -- Approximate percentile functions SELECT testid, APPROX_PERCENTILE_DISC(0.5) WITHIN GROUP(ORDER BY score) AS percentiledisc, APPROX_PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY score) AS percentilecont FROM Stats.Scores GROUP BY testid; -- If large input, high density, and need 100% accuracy, consider APPLY/OFFSET-FETCH solution -- Cont WITH C AS ( SELECT testid, COUNT(*) AS cnt, (COUNT(*) - 1) / 2 AS ov, 2 - count(*) % 2 AS fv FROM Stats.Scores GROUP BY testid ) SELECT C.testid, AVG( A.score ) AS mediancont FROM C CROSS APPLY ( SELECT S.score FROM Stats.Scores AS S WHERE S.testid = C.testid ORDER BY S.score OFFSET C.OV ROWS FETCH NEXT C.FV ROWS ONLY ) AS A GROUP BY C.testid; -- Disc WITH C AS ( SELECT testid, COUNT(*) AS cnt, (COUNT(*) - 1) / 2 AS ov FROM Stats.Scores GROUP BY testid ) SELECT C.testid, A.score AS mediandisc FROM C CROSS APPLY ( SELECT S.score FROM Stats.Scores AS S WHERE S.testid = C.testid ORDER BY S.score OFFSET C.OV ROWS FETCH NEXT 1 ROW ONLY ) AS A; --------------------------------------------------------------------- -- STRING_AGG Ordered Set Function --------------------------------------------------------------------- -- Built-in STRING_AGG function available in SQL Server 2017+ SELECT custid, STRING_AGG(orderid, ',') WITHIN GROUP(ORDER BY orderdate DESC, orderid DESC) AS custorders FROM Sales.Orders GROUP BY custid; -- Older Custom String Aggregation Solution -- Using FOR XML PATH SELECT custid, STUFF( (SELECT ',' + CAST(orderid AS VARCHAR(MAX)) AS [text()] FROM Sales.Orders AS O WHERE O.custid = C.custid ORDER BY orderid FOR XML PATH('')), 1, 1, '') AS custorders FROM Sales.Customers AS C; --------------------------------------------------------------------- -- WINDOW Clause --------------------------------------------------------------------- -- Sample query without WINDOW clause SELECT empid, ordermonth, val, SUM(val) OVER(PARTITION BY empid ORDER BY ordermonth ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS runsum, MIN(val) OVER(PARTITION BY empid ORDER BY ordermonth ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS runmin, MAX(val) OVER(PARTITION BY empid ORDER BY ordermonth ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS runmax, AVG(val) OVER(PARTITION BY empid ORDER BY ordermonth ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS runavg FROM Sales.EmpOrders; -- Sample query with WINDOW clause SELECT empid, ordermonth, val, SUM(val) OVER W AS runsum, MIN(val) OVER W AS runmin, MAX(val) OVER W AS runmax, AVG(val) OVER W AS runavg FROM Sales.EmpOrders WINDOW W AS (PARTITION BY empid ORDER BY ordermonth ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW); -- Can use one window name in another window name specification SELECT orderid, custid, orderdate, qty, val, ROW_NUMBER() OVER PO AS ordernum, MAX(orderdate) OVER P AS maxorderdate, SUM(qty) OVER POF AS runsumqty, SUM(val) OVER POF AS runsumval FROM Sales.OrderValues WINDOW P AS ( PARTITION BY custid ), PO AS ( P ORDER BY orderdate, orderid ), POF AS ( PO ROWS UNBOUNDED PRECEDING ) ORDER BY custid, orderdate, orderid; --------------------------------------------------------------------- -- Ordering Considerations --------------------------------------------------------------------- -- For more details, see: https://sqlperformance.com/2022/05/t-sql-queries/are-you-sorted-window-ordering USE PerformanceV6; -- https://itziktsql.com/r-downloads -- For demo, limit to serial plans DBCC OPTIMIZER_WHATIF(CPUs, 1); -- Indexing CREATE UNIQUE NONCLUSTERED INDEX idx_nc_cid_od_oid ON dbo.Orders(custid, orderdate, orderid); -- Window function can rely on index order -- No sorting SELECT orderid, orderdate, custid, SUM(orderid) OVER(PARTITION BY custid ORDER BY orderdate, orderid ROWS UNBOUNDED PRECEDING) AS sum1 FROM dbo.Orders; -- Multiple ordering needs -- Four sorts SELECT orderid, orderdate, custid, SUM(orderid) OVER(PARTITION BY custid ORDER BY orderid ROWS UNBOUNDED PRECEDING) AS sum2, SUM(orderid) OVER(PARTITION BY custid ORDER BY orderdate, orderid ROWS UNBOUNDED PRECEDING) AS sum1, SUM(1.0 * orderid) OVER(ORDER BY orderid ROWS UNBOUNDED PRECEDING) AS sum3 FROM dbo.Orders ORDER BY custid, orderid; -- Tip 1: Start with function that can rely on index order -- Tip 2: End with function that has the same ordering needs like presentation ordering -- Two sorts SELECT orderid, orderdate, custid, SUM(orderid) OVER(PARTITION BY custid ORDER BY orderdate, orderid ROWS UNBOUNDED PRECEDING) AS sum1, SUM(1.0 * orderid) OVER(ORDER BY orderid ROWS UNBOUNDED PRECEDING) AS sum3, SUM(orderid) OVER(PARTITION BY custid ORDER BY orderid ROWS UNBOUNDED PRECEDING) AS sum2 FROM dbo.Orders ORDER BY custid, orderid; -- Tip 3: Follow aforementioned tips for first occurrence of each distinct ordering need. -- Subsequent occurrences of same ordering, even if nonadjacent, are identified and computed as part of previous ordering arrangement. -- Still only two sorts SELECT orderid, orderdate, custid, SUM(orderid) OVER(PARTITION BY custid ORDER BY orderdate, orderid ROWS UNBOUNDED PRECEDING) AS sum1, SUM(1.0 * orderid) OVER(ORDER BY orderid ROWS UNBOUNDED PRECEDING) AS sum3, SUM(orderid) OVER(PARTITION BY custid ORDER BY orderid ROWS UNBOUNDED PRECEDING) AS sum2, MAX(orderid) OVER(PARTITION BY custid ORDER BY orderdate, orderid ROWS UNBOUNDED PRECEDING) AS max1, MAX(orderid) OVER(ORDER BY orderid ROWS UNBOUNDED PRECEDING) AS max3, MAX(orderid) OVER(PARTITION BY custid ORDER BY orderid ROWS UNBOUNDED PRECEDING) AS max2, AVG(orderid) OVER(PARTITION BY custid ORDER BY orderdate, orderid ROWS UNBOUNDED PRECEDING) AS avg1, AVG(1.0 * orderid) OVER(ORDER BY orderid ROWS UNBOUNDED PRECEDING) AS avg3, AVG(orderid) OVER(PARTITION BY custid ORDER BY orderid ROWS UNBOUNDED PRECEDING) AS avg2 FROM dbo.Orders ORDER BY custid, orderid; -- Cleanup -- Indexing DROP INDEX IF EXISTS idx_nc_cid_od_oid ON dbo.Orders; -- Reset limit to serial plans DBCC OPTIMIZER_WHATIF(CPUs, 0); USE TSQLV6; --------------------------------------------------------------------- -- Solutions Using Window Functions --------------------------------------------------------------------- --------------------------------------------------------------------- -- Gaps --------------------------------------------------------------------- -- Run the following code to create a table called T1 and fill it with sample data SET NOCOUNT ON; USE tempdb; DROP TABLE IF EXISTS dbo.T1; CREATE TABLE dbo.T1 ( col1 INT NOT NULL CONSTRAINT PK_T1 PRIMARY KEY ); GO INSERT INTO dbo.T1(col1) VALUES(1),(2),(3),(7),(8),(9),(11),(15),(16),(17),(28); -- Write a query that identifies the ranges of missing values in T1.col1. WITH C AS ( SELECT col1 AS cur, LEAD(col1) OVER(ORDER BY col1) AS nxt FROM dbo.T1 ) SELECT cur + 1 AS range_from, nxt - 1 AS range_to FROM C WHERE nxt - cur > 1; --------------------------------------------------------------------- -- Islands --------------------------------------------------------------------- -- Identifying ranges of existing values (islands) WITH C AS ( SELECT col1, col1 - ROW_NUMBER() OVER(ORDER BY col1) AS grp FROM dbo.T1 ) SELECT MIN(col1) AS range_from, MAX(col1) AS range_to FROM C GROUP BY grp; -- If duplicates are possible, use DENSE_RANK instead of ROW_NUMBER WITH C AS ( SELECT col1, col1 - DENSE_RANK() OVER(ORDER BY col1) AS grp FROM dbo.T1 ) SELECT MIN(col1) AS range_from, MAX(col1) AS range_to FROM C GROUP BY grp; -- Islands with date and time data -- The following table holds daily closing stock rates: SET NOCOUNT ON; USE tempdb; DROP TABLE IF EXISTS dbo.Stocks; CREATE TABLE dbo.Stocks ( stockid INT NOT NULL, dt DATE NOT NULL, val INT NOT NULL, CONSTRAINT PK_Stocks PRIMARY KEY(stockid, dt) ); GO INSERT INTO dbo.Stocks VALUES (1, '2023-08-01', 13), (1, '2023-08-02', 14), (1, '2023-08-03', 17), (1, '2023-08-04', 40), (1, '2023-08-05', 45), (1, '2023-08-06', 52), (1, '2023-08-07', 56), (1, '2023-08-08', 60), (1, '2023-08-09', 70), (1, '2023-08-10', 30), (1, '2023-08-11', 29), (1, '2023-08-12', 35), (1, '2023-08-13', 40), (1, '2023-08-14', 45), (1, '2023-08-15', 60), (1, '2023-08-16', 60), (1, '2023-08-17', 55), (1, '2023-08-18', 60), (1, '2023-08-19', 20), (1, '2023-08-20', 15), (1, '2023-08-21', 20), (1, '2023-08-22', 30), (1, '2023-08-23', 40), (1, '2023-08-24', 20), (1, '2023-08-25', 60), (1, '2023-08-26', 80), (1, '2023-08-27', 70), (1, '2023-08-28', 70), (1, '2023-08-29', 40), (1, '2023-08-30', 30), (1, '2023-08-31', 10), (2, '2023-08-01', 3), (2, '2023-08-02', 4), (2, '2023-08-03', 7), (2, '2023-08-04', 30), (2, '2023-08-05', 35), (2, '2023-08-06', 42), (2, '2023-08-07', 46), (2, '2023-08-08', 50), (2, '2023-08-09', 60), (2, '2023-08-10', 20), (2, '2023-08-11', 19), (2, '2023-08-12', 25), (2, '2023-08-13', 30), (2, '2023-08-14', 35), (2, '2023-08-15', 50), (2, '2023-08-16', 50), (2, '2023-08-17', 45), (2, '2023-08-18', 50), (2, '2023-08-19', 10), (2, '2023-08-20', 5), (2, '2023-08-21', 10), (2, '2023-08-22', 20), (2, '2023-08-23', 30), (2, '2023-08-24', 10), (2, '2023-08-25', 50), (2, '2023-08-26', 70), (2, '2023-08-27', 60), (2, '2023-08-28', 60), (2, '2023-08-29', 30), (2, '2023-08-30', 20), (2, '2023-08-31', 1); -- Your task is to identify the ranges of consecutive dates where the stock value -- was >= 50. You can assume that there are no gaps between existing dates. -- On weekends and holidays the last known closing rates are stored. -- For each qualifying period return the stock ID, start and end dates, and maximum stock value within the period. -- Solution USE tempdb; WITH C AS ( SELECT stockid, dt, val, DATEADD(day, -1 * DENSE_RANK() OVER(PARTITION BY stockid ORDER BY dt), dt) AS grp FROM dbo.Stocks WHERE val >= 50 ) SELECT stockid, MIN(dt) AS startdate, MAX(dt) AS enddate, MAX(val) as maxvalue FROM C GROUP BY stockid, grp ORDER BY stockid, startdate; --------------------------------------------------------------------- -- TOP N Per Group --------------------------------------------------------------------- USE TSQLV6; -- POC index CREATE UNIQUE INDEX idx_poc ON Sales.Orders(custid, orderdate DESC, orderid DESC) INCLUDE(empid); -- APPLY, POC + High Density SELECT C.custid, A.* FROM Sales.Customers AS C CROSS APPLY ( SELECT TOP (3) orderid, orderdate, empid FROM Sales.Orders AS O WHERE O.custid = C.custid ORDER BY orderdate DESC, orderid DESC ) AS A; -- ROW_NUMBER, no POC or low density WITH C AS ( SELECT ROW_NUMBER() OVER( PARTITION BY custid ORDER BY orderdate DESC, orderid DESC) AS rownum, orderid, orderdate, custid, empid FROM Sales.Orders ) SELECT custid, orderdate, orderid, empid FROM C WHERE rownum <= 3; -- Remove index DROP INDEX Sales.Orders.idx_POC; --------------------------------------------------------------------- -- Max Concurrent Intervals --------------------------------------------------------------------- -- Creating and Populating Sessions SET NOCOUNT ON; USE tempdb; DROP TABLE IF EXISTS dbo.Sessions; CREATE TABLE dbo.Sessions ( keycol INT NOT NULL, app VARCHAR(10) NOT NULL, starttime DATETIME2 NOT NULL, endtime DATETIME2 NOT NULL, CONSTRAINT PK_Sessions PRIMARY KEY(keycol), CHECK(endtime > starttime) ); GO CREATE UNIQUE INDEX idx_start ON dbo.Sessions(app, starttime, keycol) include(endtime); CREATE UNIQUE INDEX idx_end ON dbo.Sessions(app, endtime, keycol) include(starttime); -- Sample data INSERT INTO dbo.Sessions(keycol, app, starttime, endtime) VALUES (2, 'app1', '20221101 08:30', '20221101 10:30'), (3, 'app1', '20221101 08:30', '20221101 08:45'), (5, 'app1', '20221101 09:00', '20221101 09:30'), (7, 'app1', '20221101 09:15', '20221101 10:30'), (11, 'app1', '20221101 09:15', '20221101 09:30'), (13, 'app1', '20221101 10:30', '20221101 14:30'), (17, 'app1', '20221101 10:45', '20221101 11:30'), (19, 'app1', '20221101 11:00', '20221101 12:30'), (23, 'app2', '20221101 08:30', '20221101 08:45'), (29, 'app2', '20221101 09:00', '20221101 09:30'), (31, 'app2', '20221101 11:45', '20221101 12:00'), (37, 'app2', '20221101 12:30', '20221101 14:00'), (41, 'app2', '20221101 12:45', '20221101 13:30'), (43, 'app2', '20221101 13:00', '20221101 14:00'), (47, 'app2', '20221101 14:00', '20221101 16:30'), (53, 'app2', '20221101 15:30', '20221101 17:00'); GO -- Task: compute max concurrent intervals per app -- Solution WITH C1 AS ( SELECT keycol, app, starttime AS ts, +1 AS type FROM dbo.Sessions UNION ALL SELECT keycol, app, endtime AS ts, -1 AS type FROM dbo.Sessions ), C2 AS ( SELECT *, SUM(type) OVER(PARTITION BY app ORDER BY ts, type, keycol ROWS UNBOUNDED PRECEDING) AS cnt FROM C1 ) SELECT app, MAX(cnt) AS mx FROM C2 GROUP BY app; --------------------------------------------------------------------- -- Packing Intervals --------------------------------------------------------------------- -- Task: pack groups of intersecting intervals per app -- Solution WITH C1 AS ( SELECT keycol, app, starttime AS ts, +1 AS type FROM dbo.Sessions UNION ALL SELECT keycol, app, endtime AS ts, -1 AS type FROM dbo.Sessions ), C2 AS ( SELECT *, SUM(type) OVER(PARTITION BY app ORDER BY ts, type DESC, keycol ROWS UNBOUNDED PRECEDING) AS cnt FROM C1 ), C3 AS ( SELECT *, (ROW_NUMBER() OVER(PARTITION BY app ORDER BY ts, type DESC, keycol) - 1) / 2 + 1 AS p FROM C2 WHERE (type = 1 AND cnt = 1) OR cnt = 0 ) SELECT app, MIN(ts) AS starttime, MAX(ts) AS endtime FROM C3 GROUP BY app, p; --------------------------------------------------------------------- -- Time Series --------------------------------------------------------------------- --------------------------------------------------------------------- -- DATE_BUCKET and DATETRUNC function --------------------------------------------------------------------- -- Examples with 2-month bucket width and custom origin DECLARE @timestamp AS DATETIME2 = '20210510 06:30:00', @bucketwidth AS INT = 2, @origin AS DATETIME2 = '20210115 00:00:00'; SELECT DATE_BUCKET(month, @bucketwidth, @timestamp, @origin); GO -- Output: 2021-03-15 00:00:00.0000000 -- DATETRUNC is like a simple version of DATE_BUCKET -- Following return the same result SELECT DATE_BUCKET(month, 1, SYSDATETIME()) AS startofmonth1, DATETRUNC(month, SYSDATETIME()) AS startofmonth2; --------------------------------------------------------------------- -- Optimization of DATE_BUCKET and DATETRUNC can rely on index order --------------------------------------------------------------------- --------------------------------------------------------------------- -- Optimization of Traditional Time-Based Grouping --------------------------------------------------------------------- -- Relies on index order -- Uses Stream Aggregate USE TSQLV6; SELECT orderdate, COUNT(*) AS numorders FROM Sales.Orders GROUP BY orderdate; -- Uses Hash Match aggregate SELECT orderyear, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(YEAR(orderdate))) AS D(orderyear) GROUP BY orderyear; -- Force order group or add presentation ORDER BY -- Doesn't rely on index order -- Results in Sort operator in plan SELECT orderyear, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(YEAR(orderdate))) AS D(orderyear) GROUP BY orderyear OPTION(ORDER GROUP); SELECT orderyear, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(YEAR(orderdate))) AS D(orderyear) GROUP BY orderyear ORDER BY orderyear; -- Same applies to grouping by year, month SELECT orderyear, ordermonth, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(YEAR(orderdate), MONTH(orderdate))) AS D(orderyear, ordermonth) GROUP BY orderyear, ordermonth; -- Same applies to grouping by week SELECT startofweek, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(DATEADD(week, DATEDIFF(week, CAST('19000107' AS DATE), orderdate), CAST('19000107' AS DATE)))) AS D(startofweek) GROUP BY startofweek; --------------------------------------------------------------------- -- The Old and Ugly Workaround --------------------------------------------------------------------- -- Using indexed computed column ALTER TABLE Sales.Orders ADD corderyear AS YEAR(orderdate); CREATE NONCLUSTERED INDEX idx_nc_corderyear ON Sales.Orders(corderyear); -- Relies on index SELECT orderyear, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(YEAR(orderdate))) AS D(orderyear) GROUP BY orderyear; -- Cleanup DROP INDEX idx_nc_corderyear ON Sales.Orders; ALTER TABLE Sales.Orders DROP COLUMN corderyear; --------------------------------------------------------------------- -- The New and Elegant Workaround Using the DATE_BUCKET Function --------------------------------------------------------------------- -- Example using DATE_BUCKET SELECT DATE_BUCKET( year, 1, CAST('20220718' AS DATE) ) AS startofyear; -- Instead of SELECT orderyear, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(YEAR(orderdate))) AS D(orderyear) GROUP BY orderyear; -- Use SELECT YEAR(yearbucket) AS orderyear, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(DATE_BUCKET(year, 1, orderdate))) AS D(yearbucket) GROUP BY yearbucket; -- Instead of SELECT orderyear, ordermonth, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(YEAR(orderdate), MONTH(orderdate))) AS D(orderyear, ordermonth) GROUP BY orderyear, ordermonth; -- Use SELECT YEAR(yearmonthbucket) AS orderyear, MONTH(yearmonthbucket) AS ordermonth, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(DATE_BUCKET(month, 1, orderdate))) AS D(yearmonthbucket) GROUP BY yearmonthbucket; -- Instead of SELECT startofweek, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(DATEADD(week, DATEDIFF(week, CAST('19000107' AS DATE), orderdate), CAST('19000107' AS DATE)))) AS D(startofweek) GROUP BY startofweek; -- Use SELECT startofweek, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(DATE_BUCKET(week, 1, orderdate, CAST('19000107' AS DATE)))) AS D(startofweek) GROUP BY startofweek; --------------------------------------------------------------------- -- What about the DATETRUNC function? --------------------------------------------------------------------- -- Example using DATETRUNC SELECT DATETRUNC( year, CAST('20220718' AS DATE) ) AS startofyear; SELECT DATE_BUCKET( year, 1, CAST('20220718' AS DATE) ) AS startofyear; -- Instead of SELECT orderyear, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(YEAR(orderdate))) AS D(orderyear) GROUP BY orderyear; -- Could use SELECT YEAR(startofyear) AS orderyear, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(DATETRUNC(year, orderdate))) AS D(startofyear) GROUP BY startofyear; -- Instead of SELECT orderyear, ordermonth, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(YEAR(orderdate), MONTH(orderdate))) AS D(orderyear, ordermonth) GROUP BY orderyear, ordermonth; -- Could use SELECT YEAR(startofmonth) AS orderyear, MONTH(startofmonth) AS ordermonth, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(DATETRUNC(month, orderdate))) AS D(startofmonth) GROUP BY startofmonth; -- Instead of SELECT startofweek, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(DATEADD(week, DATEDIFF(week, CAST('19000107' AS DATE), orderdate), CAST('19000107' AS DATE)))) AS D(startofweek) GROUP BY startofweek; -- Could use SET DATEFIRST 7; SELECT startofweek, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(DATETRUNC(week, orderdate))) AS D(startofweek) GROUP BY startofweek; --------------------------------------------------------------------- -- Querying Time Series --------------------------------------------------------------------- -- Group by fiscal year, which starts July 1st SELECT fiscalyearstart, COUNT(*) AS numorders FROM Sales.Orders CROSS APPLY (VALUES(DATE_BUCKET(year, 1, orderdate, CAST('19000701' AS DATE)))) AS A(fiscalyearstart) GROUP BY fiscalyearstart; -- Sample data for sensors USE TSQLV6; DROP TABLE IF EXISTS dbo.SensorMeasurements, dbo.Sensors; CREATE TABLE dbo.Sensors ( sensorid INT NOT NULL CONSTRAINT PK_Sensors PRIMARY KEY, description VARCHAR(50) NOT NULL ); INSERT INTO dbo.Sensors(sensorid, description) VALUES (1, 'Restaurant Fancy Schmancy beer fridge'), (2, 'Restaurant Fancy Schmancy wine cellar'); CREATE TABLE dbo.SensorMeasurements ( sensorid INT NOT NULL CONSTRAINT FK_SensorMeasurements_Sensors REFERENCES dbo.Sensors, ts DATETIME2(0) NOT NULL, temperature NUMERIC(5, 2) NOT NULL, -- Fahrenheit humidity NUMERIC(5, 2) NOT NULL, -- percent CONSTRAINT PK_SensorMeasurements PRIMARY KEY(sensorid, ts) ); INSERT INTO dbo.SensorMeasurements(sensorid, ts, temperature, humidity) VALUES (1, '20220609 06:00:03', 39.16, 86.28), (1, '20220609 09:59:57', 39.72, 83.44), (1, '20220609 13:59:59', 38.93, 84.33), (1, '20220609 18:00:00', 39.42, 79.66), (1, '20220609 22:00:01', 40.08, 94.44), (1, '20220610 01:59:57', 41.26, 90.42), (1, '20220610 05:59:59', 40.89, 72.94), (1, '20220610 09:59:58', 40.03, 84.48), (1, '20220610 14:00:03', 41.23, 93.47), (1, '20220610 17:59:59', 39.32, 88.09), (1, '20220610 21:59:57', 41.19, 92.89), (1, '20220611 01:59:58', 40.88, 89.23), (1, '20220611 06:00:03', 41.14, 82.27), (1, '20220611 10:00:00', 39.20, 86.00), (1, '20220611 14:00:02', 39.41, 74.92), (1, '20220611 18:00:02', 41.12, 87.37), (1, '20220611 21:59:59', 40.67, 84.63), (1, '20220612 02:00:02', 41.15, 86.16), (1, '20220612 06:00:02', 39.23, 74.59), (1, '20220612 10:00:00', 41.40, 86.80), (1, '20220612 14:00:00', 41.20, 79.97), (1, '20220612 18:00:03', 40.11, 92.84), (1, '20220612 22:00:03', 40.87, 94.23), (1, '20220613 02:00:00', 39.03, 92.44), (1, '20220613 05:59:57', 40.19, 94.72), (1, '20220613 10:00:02', 39.55, 87.77), (1, '20220613 14:00:02', 38.94, 89.06), (1, '20220613 18:00:03', 40.88, 73.81), (1, '20220613 21:59:57', 41.24, 86.56), (1, '20220614 02:00:00', 40.25, 76.64), (1, '20220614 06:00:01', 40.73, 90.66), (1, '20220614 10:00:03', 40.82, 92.76), (1, '20220614 13:59:58', 39.70, 73.74), (1, '20220614 17:59:57', 39.65, 89.38), (1, '20220614 22:00:02', 39.47, 73.36), (1, '20220615 02:00:03', 39.14, 77.89), (1, '20220615 06:00:00', 40.82, 86.84), (1, '20220615 09:59:57', 39.91, 90.09), (1, '20220615 13:59:57', 41.34, 82.88), (1, '20220615 18:00:01', 40.51, 86.58), (1, '20220615 22:00:00', 41.23, 83.85), (2, '20220609 06:00:01', 54.95, 75.39), (2, '20220609 10:00:03', 56.94, 71.34), (2, '20220609 13:59:59', 54.07, 68.09), (2, '20220609 18:00:02', 54.05, 65.50), (2, '20220609 22:00:00', 53.37, 66.28), (2, '20220610 01:59:58', 56.33, 79.90), (2, '20220610 05:59:58', 57.00, 65.88), (2, '20220610 10:00:02', 54.64, 61.10), (2, '20220610 14:00:01', 53.48, 69.76), (2, '20220610 17:59:57', 55.15, 65.85), (2, '20220610 22:00:02', 54.48, 75.90), (2, '20220611 02:00:00', 54.55, 62.28), (2, '20220611 06:00:01', 54.56, 66.36), (2, '20220611 09:59:58', 55.92, 77.53), (2, '20220611 14:00:02', 55.89, 68.57), (2, '20220611 18:00:01', 54.82, 62.04), (2, '20220611 22:00:01', 55.58, 76.20), (2, '20220613 01:59:58', 56.29, 62.33), (2, '20220615 10:00:03', 53.24, 70.67), (2, '20220615 13:59:59', 55.93, 77.60), (2, '20220615 18:00:01', 54.05, 66.56), (2, '20220615 21:59:58', 54.66, 61.13); SELECT * FROM dbo.Sensors; SELECT * FROM dbo.SensorMeasurements; GO -- Grouping and aggregating data by bucket DECLARE @bucketwidth AS INT = 12, @origin AS DATETIME2(0) = '19000101 00:00:00'; WITH C AS ( SELECT sensorid, ts, temperature, DATE_BUCKET(hour, @bucketwidth, ts, @origin) AS bucketstart FROM dbo.SensorMeasurements ) SELECT sensorid, bucketstart, DATEADD(hour, @bucketwidth, bucketstart) AS bucketend, MIN(temperature) AS mintemp, MAX(temperature) AS maxtemp, AVG(temperature) AS avgtemp FROM C GROUP BY sensorid, bucketstart ORDER BY sensorid, bucketstart; GO --------------------------------------------------------------------- -- Gap filling --------------------------------------------------------------------- -- Produce all possible bucket start times with GENERATE_SERIES DECLARE @bucketwidth AS INT = 12, @startperiod AS DATETIME2(0) = '20220609 00:00:00', @endperiod AS DATETIME2(0) = '20220615 12:00:00'; SELECT DATEADD(hour, value * @bucketwidth, @startperiod) AS ts FROM GENERATE_SERIES(0, DATEDIFF(hour, @startperiod, @endperiod) / @bucketwidth) AS N; GO -- Gap filling solution using GENERATE_SERIES and NULL treatment clause DECLARE @bucketwidth AS INT = 12, @startperiod AS DATETIME2(0) = '20220609 00:00:00', @endperiod AS DATETIME2(0) = '20220615 12:00:00'; WITH TS AS ( SELECT DATEADD(hour, value * @bucketwidth, @startperiod) AS ts FROM GENERATE_SERIES(0, DATEDIFF(hour, @startperiod, @endperiod) / @bucketwidth) AS N ), C1 AS ( SELECT sensorid, ts, temperature, DATE_BUCKET(hour, @bucketwidth, ts, @startperiod) AS bucketstart FROM dbo.SensorMeasurements ), C2 AS ( SELECT sensorid, bucketstart, MIN(temperature) AS mintemp, MAX(temperature) AS maxtemp, AVG(temperature) AS avgtemp FROM C1 GROUP BY sensorid, bucketstart ) SELECT S.sensorid, TS.ts AS bucketstart, DATEADD(hour, @bucketwidth, TS.ts) AS bucketend, -- mintemp, maxtemp, avgtemp -- uncomment to see NULLs at gaps LAST_VALUE(mintemp) IGNORE NULLS OVER W AS mintemp, LAST_VALUE(maxtemp) IGNORE NULLS OVER W AS maxtemp, LAST_VALUE(avgtemp) IGNORE NULLS OVER W AS avgtemp FROM dbo.Sensors AS S CROSS JOIN TS LEFT OUTER JOIN C2 ON S.sensorid = C2.sensorid AND TS.ts = C2.bucketstart WINDOW W AS (PARTITION BY S.sensorid ORDER BY TS.ts ROWS UNBOUNDED PRECEDING) ORDER BY sensorid, bucketstart; GO --------------------------------------------------------------------- -- Grouping Sets --------------------------------------------------------------------- -- Code to Create and Populate the Orders Table SET NOCOUNT ON; USE tempdb; DROP TABLE IF EXISTS dbo.Orders; CREATE TABLE dbo.Orders ( orderid INT NOT NULL, orderdate DATETIME NOT NULL, empid INT NOT NULL, custid VARCHAR(5) NOT NULL, qty INT NOT NULL, CONSTRAINT PK_Orders PRIMARY KEY(orderid) ); GO INSERT INTO dbo.Orders (orderid, orderdate, empid, custid, qty) VALUES (30001, '20200802', 3, 'A', 10), (10001, '20201224', 1, 'A', 12), (10005, '20201224', 1, 'B', 20), (40001, '20210109', 4, 'A', 40), (10006, '20210118', 1, 'C', 14), (20001, '20210212', 2, 'B', 12), (40005, '20220212', 4, 'A', 10), (20002, '20220216', 2, 'C', 20), (30003, '20220418', 3, 'B', 15), (30004, '20200418', 3, 'C', 22), (30007, '20200907', 3, 'D', 30); --------------------------------------------------------------------- -- GROUPING SETS Subclause --------------------------------------------------------------------- SELECT custid, empid, YEAR(orderdate) AS orderyear, SUM(qty) AS qty FROM dbo.Orders GROUP BY GROUPING SETS ( ( custid, empid, YEAR(orderdate) ), ( custid, YEAR(orderdate) ), ( empid, YEAR(orderdate) ), () ); -- Logically equivalent to unifying multiple aggregate queries: SELECT custid, empid, YEAR(orderdate) AS orderyear, SUM(qty) AS qty FROM dbo.Orders GROUP BY custid, empid, YEAR(orderdate) UNION ALL SELECT custid, NULL AS empid, YEAR(orderdate) AS orderyear, SUM(qty) AS qty FROM dbo.Orders GROUP BY custid, YEAR(orderdate) UNION ALL SELECT NULL AS custid, empid, YEAR(orderdate) AS orderyear, SUM(qty) AS qty FROM dbo.Orders GROUP BY empid, YEAR(orderdate) UNION ALL SELECT NULL AS custid, NULL AS empid, NULL AS orderyear, SUM(qty) AS qty FROM dbo.Orders; --------------------------------------------------------------------- -- CUBE Subclause --------------------------------------------------------------------- SELECT custid, empid, SUM(qty) AS qty FROM dbo.Orders GROUP BY CUBE(custid, empid); -- Equivalent to: SELECT custid, empid, SUM(qty) AS qty FROM dbo.Orders GROUP BY GROUPING SETS ( ( custid, empid ), ( custid ), ( empid ), () ); --------------------------------------------------------------------- -- ROLLUP Subclause --------------------------------------------------------------------- -- Optimization minimizes sorts -- Following involves one sort SELECT YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, DAY(orderdate) AS orderday, SUM(qty) AS qty FROM dbo.Orders GROUP BY ROLLUP(YEAR(orderdate), MONTH(orderdate), DAY(orderdate)); -- Following index doesn't eliminate sort CREATE INDEX idx_nc_od_i_qty ON dbo.Orders(orderdate) INCLUDE(qty); -- To eliminate sort, can use computed column and index -- Add computed columns and index ALTER TABLE dbo.Orders ADD corderyear AS YEAR(orderdate), cordermonth AS MONTH(orderdate), corderday AS DAY(orderdate); CREATE NONCLUSTERED INDEX idx_nc_corderymd_i_qty ON dbo.Orders(corderyear, cordermonth, corderday) INCLUDE(qty); -- Equivalent to: SELECT YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, DAY(orderdate) AS orderday, SUM(qty) AS qty FROM dbo.Orders GROUP BY GROUPING SETS ( ( YEAR(orderdate), MONTH(orderdate), DAY(orderdate) ), ( YEAR(orderdate), MONTH(orderdate) ), ( YEAR(orderdate) ), () ); --------------------------------------------------------------------- -- Grouping Sets Algebra --------------------------------------------------------------------- -- Multiplication SELECT custid, empid, YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, DAY(orderdate) AS orderday, SUM(qty) AS qty FROM dbo.Orders GROUP BY CUBE(custid, empid), ROLLUP(YEAR(orderdate), MONTH(orderdate), DAY(orderdate)); -- Equivalent to: SELECT custid, empid, YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, SUM(qty) AS qty FROM dbo.Orders GROUP BY GROUPING SETS ( ( custid, empid ), ( custid ), ( empid ), () ), GROUPING SETS ( ( YEAR(orderdate), MONTH(orderdate), DAY(orderdate) ), ( YEAR(orderdate), MONTH(orderdate) ), ( YEAR(orderdate) ), () ); -- Equivalent to: SELECT custid, empid, YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, SUM(qty) AS qty FROM dbo.Orders GROUP BY GROUPING SETS ( ( custid, empid, YEAR(orderdate), MONTH(orderdate), DAY(orderdate) ), ( custid, empid, YEAR(orderdate), MONTH(orderdate) ), ( custid, empid, YEAR(orderdate) ), ( custid, empid ), ( custid, YEAR(orderdate), MONTH(orderdate), DAY(orderdate) ), ( custid, YEAR(orderdate), MONTH(orderdate) ), ( custid, YEAR(orderdate) ), ( custid ), ( empid, YEAR(orderdate), MONTH(orderdate), DAY(orderdate) ), ( empid, YEAR(orderdate), MONTH(orderdate) ), ( empid, YEAR(orderdate) ), ( empid ), ( YEAR(orderdate), MONTH(orderdate), DAY(orderdate) ), ( YEAR(orderdate), MONTH(orderdate) ), ( YEAR(orderdate) ), () ); -- Division (pulling out common elements) SELECT custid, empid, YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, SUM(qty) AS qty FROM dbo.Orders GROUP BY GROUPING SETS ( ( custid, empid, YEAR(orderdate), MONTH(orderdate) ), ( custid, empid, YEAR(orderdate) ), ( custid, YEAR(orderdate), MONTH(orderdate) ), ( custid, YEAR(orderdate) ), ( empid, YEAR(orderdate), MONTH(orderdate) ), ( empid, YEAR(orderdate) ) ); -- Equivalent to: SELECT custid, empid, YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, SUM(qty) AS qty FROM dbo.Orders GROUP BY YEAR(orderdate), -- same as GROUPING SETS( (YEAR(orderdate) ) GROUPING SETS ( ( custid, empid, MONTH(orderdate) ), ( custid, empid ), ( custid, MONTH(orderdate) ), ( custid ), ( empid, MONTH(orderdate) ), ( empid ) ); -- Equivalent to: SELECT custid, empid, YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, SUM(qty) AS qty FROM dbo.Orders GROUP BY YEAR(orderdate), GROUPING SETS ( ( custid, empid ), ( custid ), ( empid ) ), GROUPING SETS ( ( MONTH(orderdate) ), () ); -- Addition SELECT custid, empid, YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, SUM(qty) AS qty FROM dbo.Orders GROUP BY GROUPING SETS ( ( custid, empid ), ( custid ), ( empid ), ROLLUP(YEAR(orderdate), MONTH(orderdate), DAY(orderdate)) ); -- Equivalent to: SELECT custid, empid, YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, SUM(qty) AS qty FROM dbo.Orders GROUP BY GROUPING SETS ( ( custid, empid ), ( custid ), ( empid ), ( YEAR(orderdate), MONTH(orderdate), DAY(orderdate) ), ( YEAR(orderdate), MONTH(orderdate) ), ( YEAR(orderdate) ), () ); --------------------------------------------------------------------- -- GROUPING_ID Function --------------------------------------------------------------------- SELECT GROUPING_ID( custid, empid, YEAR(orderdate), MONTH(orderdate), DAY(orderdate) ) AS grp_id, custid, empid, YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, DAY(orderdate) AS orderday, SUM(qty) AS qty FROM dbo.Orders GROUP BY CUBE(custid, empid), ROLLUP(YEAR(orderdate), MONTH(orderdate), DAY(orderdate)); --------------------------------------------------------------------- -- Materialize grouping sets --------------------------------------------------------------------- SET NOCOUNT ON; USE tempdb; GO DROP TABLE IF EXISTS dbo.MyGroupingSets; GO SELECT GROUPING_ID( custid, empid, YEAR(orderdate), MONTH(orderdate), DAY(orderdate) ) AS grp_id, custid, empid, YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, DAY(orderdate) AS orderday, SUM(qty) AS qty INTO dbo.MyGroupingSets FROM dbo.Orders GROUP BY CUBE(custid, empid), ROLLUP(YEAR(orderdate), MONTH(orderdate), DAY(orderdate)); CREATE UNIQUE CLUSTERED INDEX idx_cl_grp_id_grp_attributes ON dbo.MyGroupingSets(grp_id, custid, empid, orderyear, ordermonth, orderday); GO -- Inline SELECT * FROM dbo.MyGroupingSets WHERE grp_id = 9; GO -- New Order Activity added in April 19, 2019 INSERT INTO dbo.Orders (orderid, orderdate, empid, custid, qty) VALUES (50001, '20220419', 1, 'A', 10), (50002, '20220419', 1, 'B', 30), (50003, '20220419', 2, 'A', 20), (50004, '20220419', 2, 'B', 5), (50005, '20220419', 3, 'A', 15) GO -- Incremental Update WITH LastDay AS ( SELECT GROUPING_ID( custid, empid, YEAR(orderdate), MONTH(orderdate), DAY(orderdate) ) AS grp_id, custid, empid, YEAR(orderdate) AS orderyear, MONTH(orderdate) AS ordermonth, DAY(orderdate) AS orderday, SUM(qty) AS qty FROM dbo.Orders WHERE orderdate = '20220419' GROUP BY CUBE(custid, empid), ROLLUP(YEAR(orderdate), MONTH(orderdate), DAY(orderdate)) ) MERGE INTO dbo.MyGroupingSets AS TGT USING LastDay AS SRC ON (TGT.grp_id = SRC.grp_id) AND (TGT.orderyear = SRC.orderyear OR (TGT.orderyear IS NULL AND SRC.orderyear IS NULL)) AND (TGT.ordermonth = SRC.ordermonth OR (TGT.ordermonth IS NULL AND SRC.ordermonth IS NULL)) AND (TGT.orderday = SRC.orderday OR (TGT.orderday IS NULL AND SRC.orderday IS NULL)) AND (TGT.custid = SRC.custid OR (TGT.custid IS NULL AND SRC.custid IS NULL)) AND (TGT.empid = SRC.empid OR (TGT.empid IS NULL AND SRC.empid IS NULL)) WHEN MATCHED THEN UPDATE SET TGT.qty += SRC.qty WHEN NOT MATCHED THEN INSERT (grp_id, custid, empid, orderyear, ordermonth, orderday, qty) VALUES (SRC.grp_id, SRC.custid, SRC.empid, SRC.orderyear, SRC.ordermonth, SRC.orderday, SRC.qty); -- Cleanup DROP TABLE IF EXISTS dbo.Orders; --------------------------------------------------------------------- -- PIVOT & UNPIVOT Cheat Sheets --------------------------------------------------------------------- --------------------------------------------------------------------- -- PIVOT Cheat Sheet --------------------------------------------------------------------- USE TSQLV6; /* SELECT * FROM ( SELECT , , FROM ) AS D PIVOT( aggregate_funcion( ) FOR IN ( ) ) AS P; */ SELECT * FROM ( SELECT custid, YEAR( orderdate ) AS orderyear, val FROM Sales.OrderValues ) AS D PIVOT( SUM( val ) FOR orderyear IN( [2020],[2021],[2022] ) ) AS P; --------------------------------------------------------------------- -- UNPIVOT Cheat Sheet --------------------------------------------------------------------- -- Sample Data DROP TABLE IF EXISTS dbo.PvtOrders; SELECT * INTO dbo.PvtOrders FROM (SELECT custid, YEAR( orderdate ) AS orderyear, val FROM Sales.OrderValues) AS D PIVOT( SUM( val ) FOR orderyear IN( [2020],[2021],[2022] ) ) AS P; SELECT * FROM dbo.PvtOrders; GO /* SELECT FROM UNPIVOT( FOR IN ( ) ) FOR IN ( ) ) AS U; */ SELECT custid, orderyear, val FROM dbo.PvtOrders UNPIVOT( val FOR orderyear IN ( [2020],[2021],[2022] ) ) AS U;