--------------------------------------------------------------------- -- Hone Your T-SQL Querying Skills - Write Robust and Efficient Solutions -- © Itzik Ben-Gan --------------------------------------------------------------------- -- Sample databases TSQLV6: http://tsql.Lucient.com/SampleDatabases/TSQLV6.zip -- Sample databases PerformanceV5: http://tsql.Lucient.com/SampleDatabases/PerformanceV5.zip --------------------------------------------------------------------- -- Logical Query Processing --------------------------------------------------------------------- -- Logical Query Processing Example -- Create and populate tables dbo.Customers and dbo.Orders SET NOCOUNT ON; USE tempdb; IF OBJECT_ID(N'dbo.Orders', N'U') IS NOT NULL DROP TABLE dbo.Orders; IF OBJECT_ID(N'dbo.Customers', N'U') IS NOT NULL DROP TABLE dbo.Customers; CREATE TABLE dbo.Customers ( custid CHAR(5) NOT NULL, city VARCHAR(10) NOT NULL, CONSTRAINT PK_Customers PRIMARY KEY(custid) ); CREATE TABLE dbo.Orders ( orderid INT NOT NULL, custid CHAR(5) NULL, CONSTRAINT PK_Orders PRIMARY KEY(orderid), CONSTRAINT PK_Orders_Customers FOREIGN KEY(custid) REFERENCES dbo.Customers(custid) ); GO INSERT INTO dbo.Customers(custid, city) VALUES('FISSA', 'Madrid'); INSERT INTO dbo.Customers(custid, city) VALUES('FRNDO', 'Madrid'); INSERT INTO dbo.Customers(custid, city) VALUES('KRLOS', 'Madrid'); INSERT INTO dbo.Customers(custid, city) VALUES('MRPHS', 'Zion'); INSERT INTO dbo.Orders(orderid, custid) VALUES(1, 'FRNDO'); INSERT INTO dbo.Orders(orderid, custid) VALUES(2, 'FRNDO'); INSERT INTO dbo.Orders(orderid, custid) VALUES(3, 'KRLOS'); INSERT INTO dbo.Orders(orderid, custid) VALUES(4, 'KRLOS'); INSERT INTO dbo.Orders(orderid, custid) VALUES(5, 'KRLOS'); INSERT INTO dbo.Orders(orderid, custid) VALUES(6, 'MRPHS'); INSERT INTO dbo.Orders(orderid, custid) VALUES(7, NULL); -- Return the customer ID and number of orders for customers -- from Madrid who placed fewer than 3 orders. -- Sort the result by the number of orders. SELECT C.custid, COUNT(O.orderid) AS numorders FROM dbo.Customers AS C LEFT OUTER JOIN dbo.Orders AS O ON C.custid = O.custid WHERE C.city = 'Madrid' GROUP BY C.custid HAVING COUNT(O.orderid) < 3 ORDER BY numorders; -- To reuse column aliases USE TSQLV6; -- Can use table expressions (CTEs, derived tables) WITH C1 AS ( SELECT orderid, orderdate, YEAR(orderdate) AS orderyear FROM Sales.Orders ), C2 AS ( SELECT orderid, orderdate, orderyear, DATEFROMPARTS(orderyear, 12, 31) AS endofyear FROM C1 ) SELECT orderid, orderyear FROM C2 WHERE orderdate = endofyear; -- Can use CROSS APPLY and VALUES SELECT orderid, orderyear FROM Sales.Orders CROSS APPLY ( VALUES( YEAR(orderdate) ) ) AS A1(orderyear) CROSS APPLY ( VALUES( DATEFROMPARTS(orderyear, 12, 31) ) ) AS A2(endofyear) WHERE orderdate = endofyear; GO -- WHERE optimization USE TSQLV6; DECLARE @dt AS DATE = '20210108'; -- also try with NULL input -- Not SARG SELECT orderid, shippeddate FROM Sales.Orders WHERE ISNULL(shippeddate, '99991231') = ISNULL(@dt, '99991231'); -- SARG SELECT orderid, shippeddate FROM Sales.Orders WHERE shippeddate = @dt OR (shippeddate IS NULL AND @dt IS NULL); SELECT orderid, shippeddate FROM Sales.Orders WHERE EXISTS(SELECT shippeddate INTERSECT SELECT @dt); SELECT orderid, shippeddate FROM Sales.Orders WHERE shippeddate IS NOT DISTINCT FROM @dt; -- SQL Server 2022 --------------------------------------------------------------------- -- Guidance for learning about query tuning --------------------------------------------------------------------- --------------------------------------------------------------------- -- Example with grouping and aggregation --------------------------------------------------------------------- -- More details: -- https://sqlperformance.com/2018/04/sql-plan/grouping-and-aggregating-part-1 -- https://sqlperformance.com/2018/05/sql-plan/grouping-and-aggregating-part-2 -- https://sqlperformance.com/2018/06/sql-plan/grouping-and-aggregating-part-3 -- https://sqlperformance.com/2018/07/sql-plan/grouping-and-aggregating-part-4 -- https://sqlperformance.com/2018/08/sql-plan/grouping-and-aggregating-part-5 USE PerformanceV5; SELECT empid, MAX(orderdate) AS maxod FROM dbo.Orders GROUP BY empid; --------------------------------------------------------------------- -- Preordered Stream Aggregate --------------------------------------------------------------------- CREATE INDEX idx_empid ON dbo.Orders(empid, orderdate); SELECT empid, MAX(orderdate) AS maxod FROM dbo.Orders GROUP BY empid; DROP INDEX IF EXISTS idx_empid ON dbo.Orders; --------------------------------------------------------------------- -- Sort + Stream Aggregate --------------------------------------------------------------------- SELECT empid, MAX(orderdate) AS maxod FROM (SELECT TOP (100) * FROM dbo.Orders) AS D GROUP BY empid; --------------------------------------------------------------------- -- Sidebar: grouping reliance on index order --------------------------------------------------------------------- -- https://sqlperformance.com/author/itzikbengan --------------------------------------------------------------------- -- Hash Aggregate --------------------------------------------------------------------- SELECT empid, MAX(orderdate) AS maxod FROM (SELECT TOP (10000) * FROM dbo.Orders) AS D GROUP BY empid; --------------------------------------------------------------------- -- Optimization threshold --------------------------------------------------------------------- SELECT empid, MAX(orderdate) AS maxod FROM (SELECT TOP (1002) * FROM dbo.Orders) AS D GROUP BY empid; SELECT empid, MAX(orderdate) AS maxod FROM (SELECT TOP (1003) * FROM dbo.Orders) AS D GROUP BY empid; --------------------------------------------------------------------- -- Query rewrites --------------------------------------------------------------------- -- Supporting index CREATE INDEX idx_sid_od ON dbo.Orders(shipperid, orderdate); -- Scan SELECT shipperid, MAX(orderdate) AS maxod FROM dbo.Orders GROUP BY shipperid; -- Seeks SELECT S.shipperid, O.maxod FROM dbo.Shippers AS S CROSS APPLY ( SELECT TOP (1) O.orderdate FROM dbo.Orders AS O WHERE O.shipperid = S.shipperid ORDER BY O.orderdate DESC ) AS O(maxod); -- Cleanup DROP INDEX idx_sid_od ON dbo.Orders; --------------------------------------------------------------------- -- Common querying tasks and solutions (as time permits) --------------------------------------------------------------------- --------------------------------------------------------------------- -- Handling intervals --------------------------------------------------------------------- --------------------------------------------------------------------- -- Interval Intersection --------------------------------------------------------------------- USE tempdb; DROP TABLE IF EXISTS dbo.Sessions; CREATE TABLE dbo.Sessions ( id INT NOT NULL IDENTITY(1, 1), username VARCHAR(10) NOT NULL, starttime DATETIME2 NOT NULL, endtime DATETIME2 NOT NULL, CONSTRAINT PK_Sessions PRIMARY KEY(id), CONSTRAINT CHK_endtime_gteq_starttime CHECK (endtime >= starttime) ); GO CREATE INDEX idx_nc_usr_st_ed ON dbo.Sessions(username, starttime, endtime); INSERT INTO dbo.Sessions(username, starttime, endtime) VALUES ('User1', '20221101 08:00', '20221101 08:30'), ('User1', '20221101 08:30', '20221101 09:00'), ('User1', '20221101 09:00', '20221101 09:30'), ('User1', '20221101 10:00', '20221101 11:00'), ('User1', '20221101 10:30', '20221101 12:00'), ('User1', '20221101 11:30', '20221101 12:30'), ('User2', '20221101 08:00', '20221101 10:30'), ('User2', '20221101 08:30', '20221101 10:00'), ('User2', '20221101 09:00', '20221101 09:30'), ('User2', '20221101 11:00', '20221101 11:30'), ('User2', '20221101 11:32', '20221101 12:00'), ('User2', '20221101 12:04', '20221101 12:30'), ('User3', '20221101 08:00', '20221101 09:00'), ('User3', '20221101 08:00', '20221101 08:30'), ('User3', '20221101 08:30', '20221101 09:00'), ('User3', '20221101 09:30', '20221101 09:30'); GO DECLARE @username AS VARCHAR(10) = 'User1', @s AS DATETIME2 = '20221101 11:00', @e AS DATETIME2 = '20221101 12:00'; -- Check plan with current index SELECT username, starttime, endtime FROM dbo.Sessions WHERE username = @username AND starttime <= @e AND endtime >= @s; GO -- Redefine indexing and check plan again DROP INDEX IF EXISTS idx_nc_usr_st_ed ON dbo.Sessions; CREATE INDEX idx_nc_usr_ed_st ON dbo.Sessions(username, endtime, starttime); ---------------------------------------------------------------------- -- Max Concurrent Intervals ---------------------------------------------------------------------- -- Creating and Populating Sessions SET NOCOUNT ON; USE tempdb; DROP TABLE IF EXISTS dbo.Sessions; CREATE TABLE dbo.Sessions ( keycol INT NOT NULL, app VARCHAR(10) NOT NULL, starttime DATETIME2 NOT NULL, endtime DATETIME2 NOT NULL, CONSTRAINT PK_Sessions PRIMARY KEY(keycol), CHECK(endtime > starttime) ); GO CREATE UNIQUE INDEX idx_start ON dbo.Sessions(app, starttime, keycol); CREATE UNIQUE INDEX idx_end ON dbo.Sessions(app, endtime, keycol); -- Sample data INSERT INTO dbo.Sessions(keycol, app, starttime, endtime) VALUES (2, 'app1', '20221101 08:30', '20221101 10:30'), (3, 'app1', '20221101 08:30', '20221101 08:45'), (5, 'app1', '20221101 09:00', '20221101 09:30'), (7, 'app1', '20221101 09:15', '20221101 10:30'), (11, 'app1', '20221101 09:15', '20221101 09:30'), (13, 'app1', '20221101 10:30', '20221101 14:30'), (17, 'app1', '20221101 10:45', '20221101 11:30'), (19, 'app1', '20221101 11:00', '20221101 12:30'), (23, 'app2', '20221101 08:30', '20221101 08:45'), (29, 'app2', '20221101 09:00', '20221101 09:30'), (31, 'app2', '20221101 11:45', '20221101 12:00'), (37, 'app2', '20221101 12:30', '20221101 14:00'), (41, 'app2', '20221101 12:45', '20221101 13:30'), (43, 'app2', '20221101 13:00', '20221101 14:00'), (47, 'app2', '20221101 14:00', '20221101 16:30'), (53, 'app2', '20221101 15:30', '20221101 17:00'); GO -- Task: compute max concurrent intervals per app -- Solution WITH C1 AS ( SELECT keycol, app, starttime AS ts, +1 AS type FROM dbo.Sessions UNION ALL SELECT keycol, app, endtime AS ts, -1 AS type FROM dbo.Sessions ), C2 AS ( SELECT *, SUM(type) OVER(PARTITION BY app ORDER BY ts, type, keycol ROWS UNBOUNDED PRECEDING) AS cnt FROM C1 ) SELECT app, MAX(cnt) AS mx FROM C2 GROUP BY app; ---------------------------------------------------------------------- -- Packing Intervals ---------------------------------------------------------------------- -- Task: pack groups of intersecting intervals per app -- Solution WITH C1 AS ( SELECT keycol, app, starttime AS ts, +1 AS type FROM dbo.Sessions UNION ALL SELECT keycol, app, endtime AS ts, -1 AS type FROM dbo.Sessions ), C2 AS ( SELECT *, SUM(type) OVER(PARTITION BY app ORDER BY ts, type DESC, keycol ROWS UNBOUNDED PRECEDING) AS cnt FROM C1 ), C3 AS ( SELECT *, (ROW_NUMBER() OVER(PARTITION BY app ORDER BY ts, type DESC, keycol) - 1) / 2 + 1 AS p FROM C2 WHERE (type = 1 AND cnt = 1) OR cnt = 0 ) SELECT app, MIN(ts) AS starttime, MAX(ts) AS endtime FROM C3 GROUP BY app, p; --------------------------------------------------------------------- -- Supply and demand -- See article series: https://sqlperformance.com/?s=supply+and+demand+itzik --------------------------------------------------------------------- -- DDL and sample data DROP TABLE IF EXISTS dbo.Auctions; CREATE TABLE dbo.Auctions ( ID INT NOT NULL IDENTITY(1, 1) CONSTRAINT pk_Auctions PRIMARY KEY CLUSTERED, Code CHAR(1) NOT NULL CONSTRAINT ck_Auctions_Code CHECK (Code = 'D' OR Code = 'S'), Quantity DECIMAL(19, 6) NOT NULL CONSTRAINT ck_Auctions_Quantity CHECK (Quantity > 0) ); SET NOCOUNT ON; DELETE FROM dbo.Auctions; SET IDENTITY_INSERT dbo.Auctions ON; INSERT INTO dbo.Auctions(ID, Code, Quantity) VALUES (1, 'D', 5.0), (2, 'D', 3.0), (3, 'D', 8.0), (5, 'D', 2.0), (6, 'D', 8.0), (7, 'D', 4.0), (8, 'D', 2.0), (1000, 'S', 8.0), (2000, 'S', 6.0), (3000, 'S', 2.0), (4000, 'S', 2.0), (5000, 'S', 4.0), (6000, 'S', 3.0), (7000, 'S', 2.0); SET IDENTITY_INSERT dbo.Auctions OFF; CREATE UNIQUE NONCLUSTERED INDEX idx_Code_ID_i_Quantity ON dbo.Auctions(Code, ID) INCLUDE(Quantity); CREATE NONCLUSTERED COLUMNSTORE INDEX idx_cs ON dbo.Auctions(ID) WHERE ID = -1 AND ID = -2; --------------------------------------------------------------------- -- Cursor-based solution, linear scaling, ~3 sec / 100K rows, ~12 sec / 400K rows --------------------------------------------------------------------- SET NOCOUNT ON; DROP TABLE IF EXISTS #PairingsCursor; CREATE TABLE #PairingsCursor ( DemandID INT NOT NULL, SupplyID INT NOT NULL, TradeQuantity DECIMAL(19, 6) NOT NULL ); DECLARE curDemand CURSOR LOCAL FORWARD_ONLY READ_ONLY FOR SELECT ID AS DemandID, Quantity FROM dbo.Auctions WHERE Code = 'D' ORDER BY ID; DECLARE curSupply CURSOR LOCAL FORWARD_ONLY READ_ONLY FOR SELECT ID AS SupplyID, Quantity FROM dbo.Auctions WHERE Code = 'S' ORDER BY ID; DECLARE @DemandID AS INT, @DemandQuantity AS DECIMAL(19, 6), @SupplyID AS INT, @SupplyQuantity AS DECIMAL(19, 6); OPEN curDemand; FETCH NEXT FROM curDemand INTO @DemandID, @DemandQuantity; OPEN curSupply; FETCH NEXT FROM curSupply INTO @SupplyID, @SupplyQuantity; WHILE @@FETCH_STATUS = 0 BEGIN IF @DemandQuantity <= @SupplyQuantity BEGIN INSERT #PairingsCursor(DemandID, SupplyID, TradeQuantity) VALUES(@DemandID, @SupplyID, @DemandQuantity); SET @SupplyQuantity -= @DemandQuantity; FETCH NEXT FROM curDemand INTO @DemandID, @DemandQuantity; END; ELSE BEGIN IF @SupplyQuantity > 0 BEGIN INSERT #PairingsCursor(DemandID, SupplyID, TradeQuantity) VALUES(@DemandID, @SupplyID, @SupplyQuantity); SET @DemandQuantity -= @SupplyQuantity; END; FETCH NEXT FROM curSupply INTO @SupplyID, @SupplyQuantity; END; END; CLOSE curDemand; DEALLOCATE curDemand; CLOSE curSupply; DEALLOCATE curSupply; SELECT * FROM #PairingsCursor; GO --------------------------------------------------------------------- -- Set-based using classic interval intersection, very slow, exponential scaling, 931 sec / 400K rows --------------------------------------------------------------------- -- Drop temp tables if exist DROP TABLE IF EXISTS #MyPairings, #Demand, #Supply; GO WITH D0 AS -- D0 computes running demand as EndDemand ( SELECT ID, Quantity, SUM(Quantity) OVER(ORDER BY ID ROWS UNBOUNDED PRECEDING) AS EndDemand FROM dbo.Auctions WHERE Code = 'D' ), -- D extracts prev EndDemand as StartDemand, expressing start-end demand as an interval D AS ( SELECT ID, Quantity, EndDemand - Quantity AS StartDemand, EndDemand FROM D0 ) SELECT ID, Quantity, CAST(ISNULL(StartDemand, 0.0) AS DECIMAL(19, 6)) AS StartDemand, CAST(ISNULL(EndDemand, 0.0) AS DECIMAL(19, 6)) AS EndDemand INTO #Demand FROM D; WITH S0 AS -- S0 computes running supply as EndSupply ( SELECT ID, Quantity, SUM(Quantity) OVER(ORDER BY ID ROWS UNBOUNDED PRECEDING) AS EndSupply FROM dbo.Auctions WHERE Code = 'S' ), -- S extracts prev EndSupply as StartSupply, expressing start-end supply as an interval S AS ( SELECT ID, Quantity, EndSupply - Quantity AS StartSupply, EndSupply FROM S0 ) SELECT ID, Quantity, CAST(ISNULL(StartSupply, 0.0) AS DECIMAL(19, 6)) AS StartSupply, CAST(ISNULL(EndSupply, 0.0) AS DECIMAL(19, 6)) AS EndSupply INTO #Supply FROM S; CREATE UNIQUE CLUSTERED INDEX idx_cl_ES_ID ON #Supply(EndSupply, ID); -- Outer query identifies trades as the overlapping segments of the intersecting intervals -- In the intersecting demand and supply intervals the trade quantity is then -- LEAST(EndDemand, EndSupply) - GREATEST(StartDemsnad, StartSupply) SELECT D.ID AS DemandID, S.ID AS SupplyID, CASE WHEN EndDemand < EndSupply THEN EndDemand ELSE EndSupply END - CASE WHEN StartDemand > StartSupply THEN StartDemand ELSE StartSupply END AS TradeQuantity INTO #MyPairings FROM #Demand AS D INNER JOIN #Supply AS S WITH (FORCESEEK) ON D.StartDemand < S.EndSupply AND D.EndDemand > S.StartSupply; SELECT * FROM #PairingsCursor; GO --------------------------------------------------------------------- -- Set-based using revised interval intersection, by Kamil Kosno, linear scaling, 1.34 sec / 400K rows --------------------------------------------------------------------- SET NOCOUNT ON; DROP TABLE IF EXISTS #supply, #demand; GO CREATE TABLE #demand ( DemandID INT NOT NULL, DemandQuantity DECIMAL(19, 6) NOT NULL, QuantityFromDemand DECIMAL(19, 6) NOT NULL, QuantityToDemand DECIMAL(19, 6) NOT NULL ); CREATE TABLE #supply ( SupplyID INT NOT NULL, QuantityFromSupply DECIMAL(19, 6) NOT NULL, QuantityToSupply DECIMAL(19,6) NOT NULL ); WITH demand AS ( SELECT a.ID AS DemandID, a.Quantity AS DemandQuantity, SUM(a.Quantity) OVER(ORDER BY ID ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) - a.Quantity AS QuantityFromDemand, SUM(a.Quantity) OVER(ORDER BY ID ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS QuantityToDemand FROM dbo.Auctions AS a WHERE Code = 'D' ) INSERT INTO #demand ( DemandID, DemandQuantity, QuantityFromDemand, QuantityToDemand ) SELECT d.DemandID, d.DemandQuantity, d.QuantityFromDemand, d.QuantityToDemand FROM demand AS d; WITH supply AS ( SELECT a.ID AS SupplyID, SUM(a.Quantity) OVER(ORDER BY ID ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) - a.Quantity AS QuantityFromSupply, SUM(a.Quantity) OVER(ORDER BY ID ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS QuantityToSupply FROM dbo.Auctions AS a WHERE Code = 'S' ) INSERT INTO #supply ( SupplyID, QuantityFromSupply, QuantityToSupply ) SELECT s.SupplyID, s.QuantityFromSupply, s.QuantityToSupply FROM supply AS s; CREATE UNIQUE INDEX ix_1 ON #demand(QuantityFromDemand) INCLUDE(DemandID, DemandQuantity, QuantityToDemand); CREATE UNIQUE INDEX ix_1 ON #supply(QuantityFromSupply) INCLUDE(SupplyID, QuantityToSupply); CREATE NONCLUSTERED COLUMNSTORE INDEX nci ON #demand(DemandID) WHERE DemandID is null; DROP TABLE IF EXISTS #myPairings; CREATE TABLE #myPairings ( DemandID INT NOT NULL, SupplyID INT NOT NULL, TradeQuantity DECIMAL(19, 6) NOT NULL ); /* -- Slow if using one query with OR INSERT INTO #myPairings(DemandID, SupplyID, TradeQuantity) SELECT d.DemandID, s.SupplyID, d.DemandQuantity - CASE WHEN d.QuantityFromDemand < s.QuantityFromSupply THEN s.QuantityFromSupply - d.QuantityFromDemand ELSE 0 end - CASE WHEN s.QuantityToSupply < d.QuantityToDemand THEN d.QuantityToDemand - s.QuantityToSupply ELSE 0 END AS TradeQuantity FROM #demand AS d INNER JOIN #supply AS s ON (D.QuantityFromDemand >= S.QuantityFromSupply AND D.QuantityFromDemand < S.QuantityToSupply) OR (S.QuantityFromSupply > D.QuantityFromDemand AND S.QuantityFromSupply < D.QuantityToDemand); */ INSERT INTO #myPairings(DemandID, SupplyID, TradeQuantity) SELECT d.DemandID, s.SupplyID, d.DemandQuantity - CASE WHEN d.QuantityFromDemand < s.QuantityFromSupply THEN s.QuantityFromSupply - d.QuantityFromDemand ELSE 0 end - CASE WHEN s.QuantityToSupply < d.QuantityToDemand THEN d.QuantityToDemand - s.QuantityToSupply ELSE 0 END AS TradeQuantity FROM #demand AS d INNER JOIN #supply AS s ON (d.QuantityFromDemand < s.QuantityToSupply AND s.QuantityFromSupply <= d.QuantityFromDemand) UNION ALL SELECT d.DemandID, s.SupplyID, d.DemandQuantity - CASE WHEN d.QuantityFromDemand < s.QuantityFromSupply THEN s.QuantityFromSupply - d.QuantityFromDemand ELSE 0 END - CASE WHEN s.QuantityToSupply < d.QuantityToDemand THEN d.QuantityToDemand - s.QuantityToSupply ELSE 0 END AS TradeQuantity FROM #supply AS s INNER JOIN #demand AS d ON (s.QuantityFromSupply < d.QuantityToDemand AND d.QuantityFromDemand < s.QuantityFromSupply); SELECT * FROM #myPairings; GO --------------------------------------------------------------------- -- Brian Walker, 0.531 sec / 400K rows --------------------------------------------------------------------- DROP TABLE IF EXISTS #MyPairings; CREATE TABLE #MyPairings ( DemandID INT NOT NULL, SupplyID INT NOT NULL, TradeQuantity DECIMAL(19,06) NOT NULL ); WITH D AS ( SELECT A.ID, SUM(A.Quantity) OVER (PARTITION BY A.Code ORDER BY A.ID ROWS UNBOUNDED PRECEDING) AS Running FROM dbo.Auctions AS A WHERE A.Code = 'D' ), S AS ( SELECT A.ID, SUM(A.Quantity) OVER (PARTITION BY A.Code ORDER BY A.ID ROWS UNBOUNDED PRECEDING) AS Running FROM dbo.Auctions AS A WHERE A.Code = 'S' ), W AS ( SELECT D.ID AS DemandID, S.ID AS SupplyID, ISNULL(D.Running, S.Running) AS Running FROM D FULL JOIN S ON D.Running = S.Running ), Z AS ( SELECT CASE WHEN W.DemandID IS NOT NULL THEN W.DemandID ELSE MIN(W.DemandID) OVER (ORDER BY W.Running ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) END AS DemandID, CASE WHEN W.SupplyID IS NOT NULL THEN W.SupplyID ELSE MIN(W.SupplyID) OVER (ORDER BY W.Running ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) END AS SupplyID, W.Running FROM W ) INSERT #MyPairings( DemandID, SupplyID, TradeQuantity ) SELECT Z.DemandID, Z.SupplyID, Z.Running - ISNULL(LAG(Z.Running) OVER (ORDER BY Z.Running), 0.0) AS TradeQuantity FROM Z WHERE Z.DemandID IS NOT NULL AND Z.SupplyID IS NOT NULL; SELECT * FROM #MyPairings; GO --------------------------------------------------------------------- -- Peter Larsson, 0.420 / 400K rows --------------------------------------------------------------------- DROP TABLE IF EXISTS #MyPairings; WITH cteSource(ID, Code, RunningQuantity) AS ( SELECT ID, Code, SUM(Quantity) OVER (ORDER BY ID) AS RunningQuantity FROM dbo.Auctions WHERE Code = 'S' UNION ALL SELECT ID, Code, SUM(Quantity) OVER (ORDER BY ID) AS RunningQuantity FROM dbo.Auctions WHERE Code = 'D' ) SELECT DemandID, SupplyID, TradeQuantity INTO #MyPairings FROM ( SELECT MIN(CASE WHEN Code = 'D' THEN ID ELSE NULL END) OVER (ORDER BY RunningQuantity, Code ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS DemandID, MIN(CASE WHEN Code = 'S' THEN ID ELSE NULL END) OVER (ORDER BY RunningQuantity, Code ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS SupplyID, RunningQuantity - COALESCE(LAG(RunningQuantity) OVER (ORDER BY RunningQuantity, Code), 0.0) AS TradeQuantity FROM cteSource ) AS d WHERE DemandID IS NOT NULL AND SupplyID IS NOT NULL AND TradeQuantity > 0.0; SELECT * FROM #MyPairings; GO --------------------------------------------------------------------- -- Paul White, 0.404 sec / 400K rows --------------------------------------------------------------------- DROP TABLE IF EXISTS #MyPairings; CREATE TABLE #MyPairings ( DemandID integer NOT NULL, SupplyID integer NOT NULL, TradeQuantity decimal(19, 6) NOT NULL ); GO INSERT #MyPairings WITH (TABLOCK) ( DemandID, SupplyID, TradeQuantity ) SELECT Q3.DemandID, Q3.SupplyID, Q3.TradeQuantity FROM ( SELECT Q2.DemandID, Q2.SupplyID, TradeQuantity = -- Interval overlap CASE WHEN Q2.Code = 'S' THEN CASE WHEN Q2.CumDemand >= Q2.IntEnd THEN Q2.IntLength WHEN Q2.CumDemand > Q2.IntStart THEN Q2.CumDemand - Q2.IntStart ELSE 0.0 END WHEN Q2.Code = 'D' THEN CASE WHEN Q2.CumSupply >= Q2.IntEnd THEN Q2.IntLength WHEN Q2.CumSupply > Q2.IntStart THEN Q2.CumSupply - Q2.IntStart ELSE 0.0 END END FROM ( SELECT Q1.Code, Q1.IntStart, Q1.IntEnd, Q1.IntLength, DemandID = MAX(IIF(Q1.Code = 'D', Q1.ID, 0)) OVER ( ORDER BY Q1.IntStart, Q1.ID ROWS UNBOUNDED PRECEDING), SupplyID = MAX(IIF(Q1.Code = 'S', Q1.ID, 0)) OVER ( ORDER BY Q1.IntStart, Q1.ID ROWS UNBOUNDED PRECEDING), CumSupply = SUM(IIF(Q1.Code = 'S', Q1.IntLength, 0)) OVER ( ORDER BY Q1.IntStart, Q1.ID ROWS UNBOUNDED PRECEDING), CumDemand = SUM(IIF(Q1.Code = 'D', Q1.IntLength, 0)) OVER ( ORDER BY Q1.IntStart, Q1.ID ROWS UNBOUNDED PRECEDING) FROM ( -- Demand intervals SELECT A.ID, A.Code, IntStart = SUM(A.Quantity) OVER ( ORDER BY A.ID ROWS UNBOUNDED PRECEDING) - A.Quantity, IntEnd = SUM(A.Quantity) OVER ( ORDER BY A.ID ROWS UNBOUNDED PRECEDING), IntLength = A.Quantity FROM dbo.Auctions AS A WHERE A.Code = 'D' UNION ALL -- Supply intervals SELECT A.ID, A.Code, IntStart = SUM(A.Quantity) OVER ( ORDER BY A.ID ROWS UNBOUNDED PRECEDING) - A.Quantity, IntEnd = SUM(A.Quantity) OVER ( ORDER BY A.ID ROWS UNBOUNDED PRECEDING), IntLength = A.Quantity FROM dbo.Auctions AS A WHERE A.Code = 'S' ) AS Q1 ) AS Q2 ) AS Q3 WHERE Q3.TradeQuantity > 0; SELECT * FROM #MyPairings; GO --------------------------------------------------------------------- -- Gaps and islands --------------------------------------------------------------------- -- The following table holds daily closing stock rates: SET NOCOUNT ON; USE tempdb; IF OBJECT_ID('dbo.Stocks') IS NOT NULL DROP TABLE dbo.Stocks; CREATE TABLE dbo.Stocks ( stockid VARCHAR(10) NOT NULL, dt DATE NOT NULL, val INT NOT NULL, CONSTRAINT PK_Stocks PRIMARY KEY(stockid, dt) ); GO INSERT INTO dbo.Stocks VALUES ('STOCK1', '2022-08-01', 13), ('STOCK1', '2022-08-02', 14), ('STOCK1', '2022-08-03', 17), ('STOCK1', '2022-08-04', 40), ('STOCK1', '2022-08-05', 45), ('STOCK1', '2022-08-06', 52), ('STOCK1', '2022-08-07', 56), ('STOCK1', '2022-08-08', 60), ('STOCK1', '2022-08-09', 70), ('STOCK1', '2022-08-10', 30), ('STOCK1', '2022-08-11', 29), ('STOCK1', '2022-08-12', 35), ('STOCK1', '2022-08-13', 40), ('STOCK1', '2022-08-14', 45), ('STOCK1', '2022-08-15', 60), ('STOCK1', '2022-08-16', 60), ('STOCK1', '2022-08-17', 55), ('STOCK1', '2022-08-18', 60), ('STOCK1', '2022-08-19', 20), ('STOCK1', '2022-08-20', 15), ('STOCK1', '2022-08-21', 20), ('STOCK1', '2022-08-22', 30), ('STOCK1', '2022-08-23', 40), ('STOCK1', '2022-08-24', 20), ('STOCK1', '2022-08-25', 60), ('STOCK1', '2022-08-26', 80), ('STOCK1', '2022-08-27', 70), ('STOCK1', '2022-08-28', 70), ('STOCK1', '2022-08-29', 40), ('STOCK1', '2022-08-30', 30), ('STOCK1', '2022-08-31', 10), ('STOCK2', '2022-08-01', 3), ('STOCK2', '2022-08-02', 4), ('STOCK2', '2022-08-03', 7), ('STOCK2', '2022-08-04', 30), ('STOCK2', '2022-08-05', 35), ('STOCK2', '2022-08-06', 42), ('STOCK2', '2022-08-07', 46), ('STOCK2', '2022-08-08', 50), ('STOCK2', '2022-08-09', 60), ('STOCK2', '2022-08-10', 20), ('STOCK2', '2022-08-11', 19), ('STOCK2', '2022-08-12', 25), ('STOCK2', '2022-08-13', 30), ('STOCK2', '2022-08-14', 35), ('STOCK2', '2022-08-15', 50), ('STOCK2', '2022-08-16', 50), ('STOCK2', '2022-08-17', 45), ('STOCK2', '2022-08-18', 50), ('STOCK2', '2022-08-19', 10), ('STOCK2', '2022-08-20', 5), ('STOCK2', '2022-08-21', 10), ('STOCK2', '2022-08-22', 20), ('STOCK2', '2022-08-23', 30), ('STOCK2', '2022-08-24', 10), ('STOCK2', '2022-08-25', 50), ('STOCK2', '2022-08-26', 70), ('STOCK2', '2022-08-27', 60), ('STOCK2', '2022-08-28', 60), ('STOCK2', '2022-08-29', 30), ('STOCK2', '2022-08-30', 20), ('STOCK2', '2022-08-31', 1); -- Your task is to identify the ranges of consecutive dates where the stock value -- was >= 50. You can assume that there are no gaps between existing dates. -- On weekends and holidays the last known closing rates are stored. -- For each qualifying period return the stock ID, start and end dates, and maximum stock value within the period. -- Solution for fixed intervals, e.g., 1 day WITH C AS ( SELECT stockid, dt, val, DATEADD(day, -1 * DENSE_RANK() OVER(PARTITION BY stockid ORDER BY dt), dt) AS grp FROM dbo.Stocks WHERE val >= 50 ) SELECT stockid, MIN(dt) AS startdate, MAX(dt) AS enddate, MAX(val) as maxvalue FROM C GROUP BY stockid, grp ORDER BY stockid, startdate; -- Nonfixed intervals, e.g., not a gap if < 7 days WITH C1 AS ( SELECT *, CASE WHEN DATEDIFF(day, LAG(dt) OVER(PARTITION BY stockid ORDER BY dt), dt) < 7 THEN 0 ELSE 1 END AS isstart FROM dbo.Stocks WHERE val >= 50 ), C2 AS ( SELECT *, SUM(isstart) OVER(PARTITION BY stockid ORDER BY dt ROWS UNBOUNDED PRECEDING) AS grp FROM C1 ) SELECT stockid, MIN(dt) AS startdate, MAX(dt) AS enddate, MAX(val) as maxvalue FROM C2 GROUP BY stockid, grp ORDER BY stockid, startdate; --------------------------------------------------------------------- -- TOP N per group --------------------------------------------------------------------- USE TSQLV6; -- POC index CREATE UNIQUE INDEX idx_poc ON Sales.Orders(custid, orderdate DESC, orderid DESC) INCLUDE(empid); -- APPLY, POC + High Density SELECT C.custid, A.* FROM Sales.Customers AS C CROSS APPLY ( SELECT TOP (3) orderid, orderdate, empid FROM Sales.Orders AS O WHERE O.custid = C.custid ORDER BY orderdate DESC, orderid DESC ) AS A; -- ROW_NUMBER, no POC or low density WITH C AS ( SELECT ROW_NUMBER() OVER( PARTITION BY custid ORDER BY orderdate DESC, orderid DESC) AS rownum, orderid, orderdate, custid, empid FROM Sales.Orders ) SELECT custid, orderdate, orderid, empid FROM C WHERE rownum <= 3; -- Remove index DROP INDEX Sales.Orders.idx_POC; --------------------------------------------------------------------- -- Working with a number series generator --------------------------------------------------------------------- -- Paul's solution -- Helper columnstore table DROP TABLE IF EXISTS dbo.CS; -- 64K rows (enough for 4B rows when cross joined) -- column 1 is always zero -- column 2 is (1...65536) SELECT -- type as integer NOT NULL -- (everything is normalized to 64 bits in columnstore/batch mode anyway) n1 = ISNULL(CONVERT(integer, 0), 0), n2 = ISNULL(CONVERT(integer, N.rn), 0) INTO dbo.CS FROM ( SELECT rn = ROW_NUMBER() OVER (ORDER BY @@SPID) FROM master.dbo.spt_values AS SV1 CROSS JOIN master.dbo.spt_values AS SV2 ORDER BY rn ASC OFFSET 0 ROWS FETCH NEXT 65536 ROWS ONLY ) AS N; -- Single compressed rowgroup of 65,536 rows CREATE CLUSTERED COLUMNSTORE INDEX CCI ON dbo.CS WITH (MAXDOP = 1); GO -- The function CREATE OR ALTER FUNCTION dbo.GetNums_SQLkiwi ( @low bigint = 1, @high bigint ) RETURNS table AS RETURN SELECT N.rn, n = @low - 1 + N.rn, op = @high + 1 - N.rn FROM ( SELECT -- Use @@TRANCOUNT instead of @@SPID if you like all your queries serial rn = ROW_NUMBER() OVER (ORDER BY @@SPID ASC) FROM dbo.CS AS N1 JOIN dbo.CS AS N2 -- Batch mode hash cross join -- Integer not null data type avoid hash probe residual -- This is always 0 = 0 ON N2.n1 = N1.n1 WHERE -- Try to avoid SQRT on negative numbers and enable simplification -- to single constant scan if @low > @high (with literals) -- No start-up filters in batch mode @high >= @low -- Coarse filter: -- Limit each side of the cross join to SQRT(target number of rows) -- IIF avoids SQRT on negative numbers with parameters AND N1.n2 <= CONVERT(integer, CEILING(SQRT(CONVERT(float, IIF(@high >= @low, @high - @low + 1, 0))))) AND N2.n2 <= CONVERT(integer, CEILING(SQRT(CONVERT(float, IIF(@high >= @low, @high - @low + 1, 0))))) ) AS N WHERE -- Precise filter: -- Batch mode filter the limited cross join to the exact number of rows needed -- Avoids the optimizer introducing a row-mode Top with following row mode compute scalar @low - 2 + N.rn < @high; GO -- disable showing plan -- 2 sec, uses batch mode SELECT MAX(n) AS mx FROM dbo.GetNums_SQLkiwi(1, 100000000) OPTION(MAXDOP 1); -- 6 sec, uses batch mode DECLARE @n AS BIGINT; SELECT @n = n FROM dbo.GetNums_SQLkiwi(1, 100000000) ORDER BY n OPTION(MAXDOP 1); -- GetNumsAlanCharlieItzikBatch DROP TABLE IF EXISTS dbo.BatchMe; GO CREATE TABLE dbo.BatchMe(col1 INT NOT NULL, INDEX idx_cs CLUSTERED COLUMNSTORE); GO CREATE OR ALTER FUNCTION dbo.GetNumsAlanCharlieItzikBatch(@low AS BIGINT = 1, @high AS BIGINT) RETURNS TABLE AS RETURN WITH L0 AS ( SELECT 1 AS c FROM (VALUES(1),(1),(1),(1),(1),(1),(1),(1), (1),(1),(1),(1),(1),(1),(1),(1)) AS D(c) ), L1 AS ( SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B ), L2 AS ( SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B ), L3 AS ( SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B ), Nums AS ( SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS rownum FROM L3 ) SELECT TOP(@high - @low + 1) rownum AS rn, @high + 1 - rownum AS op, @low - 1 + rownum AS n FROM Nums LEFT OUTER JOIN dbo.BatchMe ON 1 = 0 ORDER BY rownum; GO -- disable showing plan -- 6 sec, uses batch mode SELECT MAX(n) AS mx FROM dbo.GetNumsAlanCharlieItzikBatch(1, 100000000) OPTION(MAXDOP 1); -- 9 sec, uses batch mode DECLARE @n AS BIGINT; SELECT @n = n FROM dbo.GetNumsAlanCharlieItzikBatch(1, 100000000) ORDER BY n OPTION(MAXDOP 1); -- GENERATE_SERIES -- No sorting, no duplicate removal in plan SELECT DISTINCT value FROM GENERATE_SERIES(1, 10) ORDER BY value; -- 11 sec, row mode SELECT MAX(value) AS mx FROM GENERATE_SERIES(1, 100000000); -- 6 sec, row mode DECLARE @n AS BIGINT; SELECT @n = value FROM GENERATE_SERIES(1, 100000000) ORDER BY value; -- Involve columnstore table to get batch mode -- 4 sec, batch mode SELECT MAX(value) AS mx FROM GENERATE_SERIES(1, 100000000) LEFT OUTER JOIN dbo.BatchMe ON 1 = 0; -- 6 sec, row mode DECLARE @n AS BIGINT; SELECT @n = value FROM GENERATE_SERIES(1, 100000000) LEFT OUTER JOIN dbo.BatchMe ON 1 = 0 ORDER BY value; --------------------------------------------------------------------- -- Common bugs, pitfalls and best practices (as time permits) --------------------------------------------------------------------- --------------------------------------------------------------------- -- Constant folding --------------------------------------------------------------------- SELECT orderid + 5 - 1 AS myorderid FROM dbo.Orders ORDER BY myorderid; -- sort SELECT 5 + orderid - 1 AS myorderid FROM dbo.Orders ORDER BY myorderid; -- sort SELECT 5 - 1 + orderid AS myorderid FROM dbo.Orders ORDER BY myorderid; -- no sort GO -- GetNumsBad CREATE FUNCTION dbo.GetNumsBad(@low AS BIGINT = 1, @high AS BIGINT) RETURNS TABLE AS RETURN WITH L0 AS ( SELECT 1 AS c FROM (VALUES(1),(1),(1),(1),(1),(1),(1),(1), (1),(1),(1),(1),(1),(1),(1),(1)) AS D(c) ), L1 AS ( SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B ), L2 AS ( SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B ), L3 AS ( SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B ), Nums AS ( SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS rownum FROM L3 ) SELECT TOP(@high - @low + 1) rownum AS rn, @high + 1 - rownum AS op, @low + rownum - 1 AS n FROM Nums ORDER BY rownum; GO -- GetNumsGood CREATE FUNCTION dbo.GetNumsGood(@low AS BIGINT = 1, @high AS BIGINT) RETURNS TABLE AS RETURN WITH L0 AS ( SELECT 1 AS c FROM (VALUES(1),(1),(1),(1),(1),(1),(1),(1), (1),(1),(1),(1),(1),(1),(1),(1)) AS D(c) ), L1 AS ( SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B ), L2 AS ( SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B ), L3 AS ( SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B ), Nums AS ( SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS rownum FROM L3 ) SELECT TOP(@high - @low + 1) rownum AS rn, @high + 1 - rownum AS op, @low - 1 + rownum AS n FROM Nums ORDER BY rownum; GO SELECT n FROM dbo.GetNumsBad(1000001, 2000000) ORDER BY n; -- sort, @low + rownum - 1 AS n SELECT n FROM dbo.GetNumsGood(1000001, 2000000) ORDER BY n; -- no sort, @low - 1 + rownum AS n -- Can intentionally use erroneous expression to prevents const folding SELECT orderid, ROW_NUMBER() OVER(ORDER BY 'constant') AS n FROM dbo.Orders; -- const err SELECT orderid, ROW_NUMBER() OVER(ORDER BY 'cons' + 'tant') AS n FROM dbo.Orders; -- const err SELECT orderid, ROW_NUMBER() OVER(ORDER BY 1/0) AS n FROM dbo.Orders; -- no error --------------------------------------------------------------------- -- Subqueries --------------------------------------------------------------------- -- Substitution bug USE tempdb; IF OBJECT_ID(N'dbo.T1', N'U') IS NOT NULL DROP TABLE dbo.T1; IF OBJECT_ID(N'dbo.T2', N'U') IS NOT NULL DROP TABLE dbo.T2; CREATE TABLE dbo.T1(col1 INT NOT NULL); CREATE TABLE dbo.T2(col2 INT NOT NULL); GO INSERT INTO dbo.T1(col1) VALUES(1),(2),(3); INSERT INTO dbo.T2(col2) VALUES(2); GO -- Which values will this query return? SELECT col1 FROM dbo.T1 WHERE col1 IN(SELECT col1 FROM dbo.T2); -- :-( GO -- The safe way SELECT col1 FROM dbo.T1 WHERE col1 IN(SELECT T2.col1 FROM dbo.T2); -- :-) GO -- Three-valued-logic bug DROP TABLE dbo.T1, dbo.T2; CREATE TABLE dbo.T1(col1 INT NULL); CREATE TABLE dbo.T2(col1 INT NULL); GO INSERT INTO dbo.T1(col1) VALUES(2); INSERT INTO dbo.T1(col1) VALUES(NULL); INSERT INTO dbo.T2(col1) VALUES(2); INSERT INTO dbo.T2(col1) VALUES(3); GO -- Which values will this query return? SELECT col1 FROM dbo.T2 WHERE col1 NOT IN(SELECT col1 FROM dbo.T1); -- :-( -- The safe ways SELECT col1 FROM dbo.T2 WHERE NOT EXISTS(SELECT * FROM dbo.T1 WHERE T1.col1 = T2.col1); -- :-) --------------------------------------------------------------------- -- Joins --------------------------------------------------------------------- USE TSQLV6; -- Filtering Attributes from Non-Preserved Side of Outer Join -- Bug SELECT C.custid, C.companyname, O.orderid, O.orderdate FROM Sales.Customers AS C LEFT OUTER JOIN Sales.Orders AS O ON C.custid = O.custid WHERE O.orderdate >= '20220101'; -- Fix SELECT C.custid, C.companyname, O.orderid, O.orderdate FROM Sales.Customers AS C LEFT OUTER JOIN Sales.Orders AS O ON C.custid = O.custid AND O.orderdate >= '20220101'; -- Using Outer Joins in a Multi-Join Query -- Inner join NULLifies outer join SELECT C.custid, O.orderid, OD.productid, OD.qty FROM Sales.Customers AS C LEFT OUTER JOIN Sales.Orders AS O ON C.custid = O.custid INNER JOIN Sales.OrderDetails AS OD ON O.orderid = OD.orderid; -- All outer joins SELECT C.custid, O.orderid, OD.productid, OD.qty FROM Sales.Customers AS C LEFT OUTER JOIN Sales.Orders AS O ON C.custid = O.custid LEFT OUTER JOIN Sales.OrderDetails AS OD ON O.orderid = OD.orderid; -- Nested joins SELECT C.custid, O.orderid, OD.productid, OD.qty FROM Sales.Customers AS C LEFT OUTER JOIN (Sales.Orders AS O INNER JOIN Sales.OrderDetails AS OD ON O.orderid = OD.orderid) ON C.custid = O.custid;