--------------------------------------------------------------------- -- Beware of Nondeterministic T-SQL Code -- © Itzik Ben-Gan --------------------------------------------------------------------- -- Sample databases TSQLV6: http://tsql.Lucient.com/SampleDatabases/TSQLV6.zip --------------------------------------------------------------------- -- Nondeterministic order --------------------------------------------------------------------- --------------------------------------------------------------------- -- Paging example with OFFSET-FETCH -- Two pages with same row -- Also applicable to other tools such as window functions --------------------------------------------------------------------- -- Order 11069 appears in both pages -- Here plan scans data from index in descending order (to illustrate, run SELECT orderid, orderdate, custid FROM Sales.Orders ORDER BY orderdate DESC, orderid DESC) -- Effective order: orderdate DESC, orderid DESC -- Rows #1 through #10 returned -- Orders with date 2022-05-04 positioned #9: 11069 (included), #10: 11068 (included), #11: 11067 (excluded) USE TSQLV6; SELECT orderid, orderdate, custid FROM Sales.Orders ORDER BY orderdate DESC OFFSET 0 ROWS FETCH NEXT 10 ROWS ONLY; -- Here plan scans data from index ordered: false before sorting, but in practice in ascending order (to illustrate, run SELECT orderid, orderdate, custid FROM Sales.Orders ORDER BY orderdate, orderid) -- Due to the Top N Sort, effectively, choise done based on orderdate DESC, orderid ASC ordering (to illustrate, run SELECT orderid, orderdate, custid FROM Sales.Orders ORDER BY orderdate DESC, orderid) -- Effective order: orderdate DESC, orderid ASC -- Rows #11 through #20 returned -- Orders with date 2022-05-04 positioned #9: 11067 (excluded), #10: 11068 (excluded), #11: 11069 (included) SELECT orderid, orderdate, custid FROM Sales.Orders ORDER BY orderdate DESC OFFSET 10 ROWS FETCH NEXT 10 ROWS ONLY; -- Fix: tiebreaker -- Page 1 SELECT orderid, orderdate, custid FROM Sales.Orders ORDER BY orderdate DESC, orderid DESC OFFSET 0 ROWS FETCH NEXT 10 ROWS ONLY; SELECT orderid, orderdate, custid FROM Sales.Orders ORDER BY orderdate DESC, orderid DESC OFFSET 10 ROWS FETCH NEXT 10 ROWS ONLY; --------------------------------------------------------------------- -- Example with TOP -- Projecting different columns results in different selection of rows -- Also applicable to other tools such as OFFSET-FETCH, window functions --------------------------------------------------------------------- -- Effective order: orderdate DESC, orderid ASC -- Orders from 2022-04-29: 11057, 11058 SELECT TOP (20) orderid, orderdate, custid FROM Sales.Orders ORDER BY orderdate DESC; -- Effective order: orderdate DESC, orderid DESC -- Orders from 2022-04-29: 11059, 11058 SELECT TOP (20) orderid, orderdate FROM Sales.Orders ORDER BY orderdate DESC; -- Fix 1: tiebreaker SELECT TOP (20) orderid, orderdate, custid FROM Sales.Orders ORDER BY orderdate DESC, orderid DESC; SELECT TOP (20) orderid, orderdate FROM Sales.Orders ORDER BY orderdate DESC, orderid DESC; -- Fix 2: WITH TIES -- Note: deterministic selection but still nondeterministic presentation order SELECT TOP (20) WITH TIES orderid, orderdate, custid FROM Sales.Orders ORDER BY orderdate DESC; SELECT TOP (20) WITH TIES orderid, orderdate FROM Sales.Orders ORDER BY orderdate DESC; --------------------------------------------------------------------- -- Example with ROW_NUMBER -- Multipe references to named table expression such as CTE can produce different results -- Also applicable to other tools such as TOP, OFFSET_FETCH --------------------------------------------------------------------- -- Row number assignments are different in C1 and C2 WITH C AS ( SELECT *, ROW_NUMBER() OVER(ORDER BY orderdate DESC) AS n FROM Sales.Orders ) SELECT C1.orderid AS orderid1, C1.orderdate AS orderdate1, C1.custid AS custid1, C1.n AS rownum1, C2.orderid AS orderid2, C2.orderdate AS orderdate2, C2.n AS rownum2 FROM C AS C1 INNER JOIN C AS C2 ON C1.orderid = C2.orderid; -- Fix 1: tiebreaker WITH C AS ( SELECT *, ROW_NUMBER() OVER(ORDER BY orderdate DESC, orderid DESC) AS n FROM Sales.Orders ) SELECT C1.orderid AS orderid1, C1.orderdate AS orderdate1, C1.custid AS custid1, C1.n AS rownum1, C2.orderid AS orderid2, C2.orderdate AS orderdate2, C2.n AS rownum2 FROM C AS C1 INNER JOIN C AS C2 ON C1.orderid = C2.orderid; -- Fix 2: use alternative ranking function -- If need ranking to reflect count of more recent orders, use RANK WITH C AS ( SELECT *, RANK() OVER(ORDER BY orderdate DESC) AS n FROM Sales.Orders ) SELECT C1.orderid AS orderid1, C1.orderdate AS orderdate1, C1.custid AS custid1, C1.n AS rownum1, C2.orderid AS orderid2, C2.orderdate AS orderdate2, C2.n AS rownum2 FROM C AS C1 INNER JOIN C AS C2 ON C1.orderid = C2.orderid; -- If need ranking to reflect distinct count of more recent order dates, use DENSE_RANK WITH C AS ( SELECT *, DENSE_RANK() OVER(ORDER BY orderdate DESC) AS n FROM Sales.Orders ) SELECT C1.orderid AS orderid1, C1.orderdate AS orderdate1, C1.custid AS custid1, C1.n AS rownum1, C2.orderid AS orderid2, C2.orderdate AS orderdate2, C2.n AS rownum2 FROM C AS C1 INNER JOIN C AS C2 ON C1.orderid = C2.orderid; --------------------------------------------------------------------- -- Example with running totals --------------------------------------------------------------------- -- Supporting index CREATE INDEX idx_nc_od_oidD_i_fr ON Sales.Orders(orderdate, orderid DESC) INCLUDE(freight); -- Observe orders placed on 2020-07-08 in both result sets get different intermediate running totals SELECT orderid, orderdate, SUM(freight) OVER(ORDER BY orderdate ROWS UNBOUNDED PRECEDING) AS runsumfreight FROM Sales.Orders; SELECT orderid, orderdate, custid, SUM(freight) OVER(ORDER BY orderdate ROWS UNBOUNDED PRECEDING) AS runsumfreight FROM Sales.Orders; DROP INDEX IF EXISTS idx_nc_od_oidD_i_fr ON Sales.Orders; -- Fix 1: tiebreaker SELECT orderid, orderdate, SUM(freight) OVER(ORDER BY orderdate, orderid ROWS UNBOUNDED PRECEDING) AS runsumfreight FROM Sales.Orders; SELECT orderid, orderdate, custid, SUM(freight) OVER(ORDER BY orderdate, orderid ROWS UNBOUNDED PRECEDING) AS runsumfreight FROM Sales.Orders; -- Fix 2: use RANGE instead of ROWS (similar to WITH TIES) -- Note: when optimized with row mode, incurs a performance penalty SELECT orderid, orderdate, SUM(freight) OVER(ORDER BY orderdate RANGE UNBOUNDED PRECEDING) AS runsumfreight FROM Sales.Orders; SELECT orderid, orderdate, custid, SUM(freight) OVER(ORDER BY orderdate RANGE UNBOUNDED PRECEDING) AS runsumfreight FROM Sales.Orders; -- Fix 3: apply preliminary grouping SELECT orderdate, SUM(SUM(freight)) OVER(ORDER BY orderdate ROWS UNBOUNDED PRECEDING) AS runsumfreight FROM Sales.Orders GROUP BY orderdate; -- Here custid is irrelevant since it's an attribute of the order, but could show all customers if relevant SELECT orderdate, SUM(SUM(freight)) OVER(ORDER BY orderdate ROWS UNBOUNDED PRECEDING) AS runsumfreight, STRING_AGG(custid, ',') WITHIN GROUP(ORDER BY custid) AS custs FROM Sales.Orders GROUP BY orderdate; --------------------------------------------------------------------- -- When you don't care about determinism --------------------------------------------------------------------- --------------------------------------------------------------------- -- Row numbers with nondeterministinc order --------------------------------------------------------------------- -- Invalid to omit ORDER BY SELECT orderid, orderdate, custid, ROW_NUMBER() OVER() AS rownum FROM Sales.Orders; -- Invalid to ORDER BY pos SELECT orderid, orderdate, custid, ROW_NUMBER() OVER(ORDER BY 1) AS rownum FROM Sales.Orders; -- Invalid to ORDER BY constant SELECT orderid, orderdate, custid, ROW_NUMBER() OVER(ORDER BY 'no order') AS rownum FROM Sales.Orders; -- Constant folding results in pos/constant SELECT orderid, orderdate, custid, ROW_NUMBER() OVER(ORDER BY 1+0) AS rownum FROM Sales.Orders; SELECT orderid, orderdate, custid, ROW_NUMBER() OVER(ORDER BY 'no' + ' order') AS rownum FROM Sales.Orders; -- No constant folding if error or subquery SELECT orderid, orderdate, custid, ROW_NUMBER() OVER(ORDER BY 1/0) AS rownum FROM Sales.Orders; SELECT orderid, orderdate, custid, ROW_NUMBER() OVER(ORDER BY (SELECT 'no order')) AS rownum FROM Sales.Orders; --------------------------------------------------------------------- -- When you really don't care about determinism and its implications -- Calrify by using ORDER BY (SELECT 'no order') --------------------------------------------------------------------- SELECT TOP (3) orderid, orderdate, custid FROM Sales.Orders ORDER BY (SELECT 'no order'); --------------------------------------------------------------------- -- Nondeterministic functions --------------------------------------------------------------------- --------------------------------------------------------------------- -- Most nondeterministic functions called once per query reference --------------------------------------------------------------------- -- Example with RAND, but same applies to other functions like SYSDATETIME SELECT orderid, RAND() AS rnd FROM Sales.Orders; -- That's why following query doesn't produce random order SELECT orderid FROM Sales.Orders ORDER BY RAND(); -- Exception: NEWID SELECT orderid, NEWID() AS rnd FROM Sales.Orders; -- That's why following query produces random order SELECT orderid FROM Sales.Orders ORDER BY CHECKSUM(NEWID()); --------------------------------------------------------------------- -- Multiple references called separately --------------------------------------------------------------------- -- rnd1 and rnd2 are different SELECT orderid, RAND() AS rnd1, RAND() AS rnd2 FROM Sales.Orders; -- Same applies to other functions like SYSDATETIME, but sometimes more subtle to see difference SELECT orderid, SYSDATETIME() AS dt1, SYSDATETIME() AS dt2 FROM Sales.Orders; -- Try in a loop, turn off graphical execution plan SET NOCOUNT ON; DECLARE @i AS INT = 0; WHILE 1 = 1 BEGIN SET @i += 1; IF EXISTS (SELECT * FROM (SELECT SYSDATETIME() AS dt1, SYSDATETIME() AS dt2) AS D WHERE dt1 <> dt2) BREAK; END; PRINT @i; -- To rely on only one execution, use named table expression with only one reference WITH C AS ( SELECT orderid, RAND() AS rnd, SYSDATETIME() AS dt FROM Sales.Orders ) SELECT orderid, rnd AS rnd1, rnd AS rnd2, dt AS dt1, dt AS dt2 FROM C; GO --------------------------------------------------------------------- -- User Defined Functions --------------------------------------------------------------------- --------------------------------------------------------------------- -- UDF called once per reference and row, unless inlined --------------------------------------------------------------------- DROP FUNCTION IF EXISTS dbo.MySysDateTime; GO CREATE OR ALTER FUNCTION dbo.MySysDateTime() RETURNS DATETIME2 AS BEGIN RETURN SYSDATETIME(); END; GO -- Can see different results of function call in different rows SELECT orderid, dbo.MySysDateTime() AS mydt FROM Sales.Orders; --------------------------------------------------------------------- -- Use of nondeterministic functions in UDF prevents inlining --------------------------------------------------------------------- -- See plan for query above -- Also see inlining property SELECT is_inlineable FROM sys.sql_modules WHERE object_id = OBJECT_ID(N'dbo.MySysDateTime'); -- Performance penalty of not inlining is substantial -- So if possible, avoid nondeterministic functions in UDFs to enable inlining -- If not possible, consider converting to inline TVF -- Test with results discarded SET STATISTICS TIME ON; -- With built-in function, 238 ms SELECT O1.orderid, SYSDATETIME() AS dt FROM Sales.Orders AS O1 CROSS JOIN Sales.Orders AS O2; -- With noninlinable UDF, 5579 ms SELECT O1.orderid, dbo.MySysDateTime() AS dt FROM Sales.Orders AS O1 CROSS JOIN Sales.Orders AS O2; -- With iTVF 259 ms DROP FUNCTION IF EXISTS dbo.MySysDateTime2; GO CREATE OR ALTER FUNCTION dbo.MySysDateTime2() RETURNS TABLE AS RETURN SELECT SYSDATETIME() AS dt; GO SELECT O1.orderid, (SELECT dt FROM dbo.MySysDateTime2()) AS dt FROM Sales.Orders AS O1 CROSS JOIN Sales.Orders AS O2; SET STATISTICS TIME OFF;