/* Title: SQL Queries by Ira Sharenow Author: Ira Sharenow Creation Date: February 6, 2023 In order to demonstrate a wide variety of SQL query techniques, I created some data in Excel CSV format. I then read the data into SQL Server using a database named web2023. I explored T-SQL by writing a large number of queries. Below I list the tables. Then I display much of the data. Then I write more complex queries. Often I display some or all of the output. Sometimes I solve a problem in more than one way, such as writing a subquery and also using window functions. The tables 1. employees 2. products 3. customers 4. orderDetails 5. orders */ -- Displaying the data USE web2023; -- Query 1A SELECT * FROM employees; /* empID firstName lastName 1 Al Jones 2 Barb Smith 3 Carol Johnson 4 Dave Adams */ -- Query 1B SELECT * FROM products; /* prodID product 101 baseball 102 basketball 103 soccer ball 104 football */ -- Query 1C SELECT * FROM customers; /* custid name Country 51 Ellen USA 52 Fran USA 53 George USA 54 Helen USA 55 Ira USA 56 John Canada 57 Jack Canada 58 Kim Canada 59 James Mexico 60 Lucy Mexico 61 Mary Mexico */ -- Query 1D SELECT * FROM orderDetails; /* Sometimes a customer bought multiple items in the same order 1020 rows of output orderid productid unitprice qty 1001 101 10.00 9 1002 101 10.00 10 1003 101 10.00 12 1004 101 10.00 15 1005 101 10.00 15 1006 101 10.00 10 1007 101 10.00 14 1008 101 10.00 13 1009 101 10.00 5 1010 101 10.00 14 1001 102 20.00 3 1002 102 20.00 9 */ -- Query 1E SELECT * FROM orders; /* 1000 rows of output orderid custid empid orderdate shipcountry 1001 60 1 2021-04-14 USA 1002 59 1 2020-07-04 USA 1003 52 1 2021-09-30 USA 1004 51 2 2019-09-12 USA 1005 54 4 2019-11-19 USA 1006 60 3 2019-02-10 USA 1007 58 3 2019-10-08 USA ... 1599 56 1 2019-11-21 Canada 1600 56 1 2021-01-07 Canada 1601 55 3 2021-07-28 Mexico 1602 51 4 2021-03-17 Mexico 1603 58 2 2019-06-22 Mexico */ -- Query 2 -- Numbers of customers -- Answer: 11 SELECT COUNT(custid) AS customerCount FROM customers; -- Query 3 -- Number of customers who have placed orders -- Answer: 10 SELECT COUNT(DISTINCT c.custid) AS customerCount FROM customers AS c LEFT JOIN orders AS o ON c.custid = o.custid WHERE o.custid IS NOT NULL; -- Query 4 -- ID and name of customer who has not yet ordered -- Answer: Mary SELECT c.custid, c.name FROM customers AS c LEFT JOIN orders AS o ON c.custid = o.custid WHERE o.custid IS NULL; -- Query 5 -- Number of orders by year. -- Answer: 2019 331 -- 2020 315 -- 2021 354 SELECT YEAR(orderdate) AS year, COUNT(DISTINCT orderid) as totalSales FROM orders GROUP BY YEAR(orderdate); -- Query 6A -- Number of orders by year and ship country. -- Note that there was some missing data. SELECT shipcountry, YEAR(orderdate) AS year, COUNT(DISTINCT orderid) as totalSales FROM orders GROUP BY shipcountry, YEAR(orderdate); /* shipcountry year totalSales NULL 2019 2 Canada 2019 32 Mexico 2019 142 USA 2019 155 NULL 2020 1 Canada 2020 32 Mexico 2020 127 USA 2020 155 NULL 2021 2 Canada 2021 36 Mexico 2021 131 USA 2021 185 */ -- Query 6B -- Number of orders by year and ship country. Results pivoted so each year has its own column -- Nulls placed last WITH first AS ( SELECT shipcountry, YEAR(orderdate) AS year, COUNT(DISTINCT orderid) as totalSales FROM orders GROUP BY shipcountry, YEAR(orderdate) ) SELECT shipcountry, MAX(CASE WHEN year = 2019 THEN totalSales END) AS [2019], MAX(CASE WHEN year = 2020 THEN totalSales END) AS [2020], MAX(CASE WHEN year = 2021 THEN totalSales END) AS [2021] FROM first GROUP BY shipcountry ORDER BY CASE WHEN shipcountry IS NULL THEN 2 ELSE 1 END, shipcountry; -- Query 6C -- Number of orders by year and ship country. Results pivoted so each year has its own column -- Using T-SQL pivot operator -- Nulls placed last WITH first AS ( SELECT shipcountry, YEAR(orderdate) AS year, COUNT( orderid) as totalSales FROM orders GROUP BY shipcountry, YEAR(orderdate) ) SELECT shipcountry, [2019], [2020], [2021] FROM (SELECT shipcountry, year, totalSales FROM first) AS D PIVOT(MAX(totalSales) FOR year IN ([2019], [2020], [2021])) AS P ORDER BY CASE WHEN shipcountry IS NULL THEN 2 ELSE 1 END, shipcountry; -- Query 6D -- Number of orders by year and ship country. All 4 groupings in one query SELECT shipcountry, YEAR(orderdate) AS year, COUNT( orderid) as totalSales FROM orders GROUP BY GROUPING SETS ( ((shipcountry), YEAR(orderdate)), (shipcountry), (YEAR(orderdate)), () ); -- Query 6E -- Number of orders by year and ship country. All 4 groupings in one query using CUBE syntax SELECT shipcountry, YEAR(orderdate) AS year, COUNT( orderid) as totalSales FROM orders GROUP BY CUBE (shipcountry, YEAR(orderdate)); -- Query 7A -- Top 5 orders by qty. Break ties by selecting most recent SELECT TOP(5) od.orderid, od.productid, od.unitprice, od.qty, o.orderDate FROM orderDetails AS od INNER JOIN orders AS o ON od.orderid = o.orderid ORDER BY qty DESC, orderDate DESC; /* orderid productid unitprice qty orderDate 1658 104 40.00 15 2021-12-16 1618 103 30.00 15 2021-11-12 1106 102 20.00 15 2021-11-02 1383 104 40.00 15 2021-10-09 1257 101 10.00 15 2021-10-06 */ -- Query 7B -- Top 5 orders by qty. Break ties by selecting most recent. Now add customer and employee SELECT TOP(5) c.name AS custName, e.firstname AS empFirst, e.lastname AS empLast, od.orderid, productid, unitprice, qty, o.orderDate FROM orderDetails AS od INNER JOIN orders AS o ON od.orderid = o.orderid INNER JOIN customers AS c on o.custid = c.custid INNER JOIN employees AS e ON o.empid = e.empid ORDER BY qty DESC, orderDate DESC; /* custName empFirst empLast orderid productid unitprice qty orderDate Helen Barb Smith 1658 104 40.00 15 2021-12-16 John Al Jones 1618 103 30.00 15 2021-11-12 Kim Barb Smith 1106 102 20.00 15 2021-11-02 James Al Jones 1383 104 40.00 15 2021-10-09 Lucy Al Jones 1257 101 10.00 15 2021-10-06 */ -- Query 7C -- Top 5 orders by qty. Break ties by selecting most recent. Now add customer and employee -- Alternative: use OFFSETFETCH SELECT c.name AS custName, e.firstname AS empFirst, e.lastname AS empLast, od.orderid, productid, unitprice, qty, o.orderDate FROM orderDetails AS od INNER JOIN orders AS o ON od.orderid = o.orderid INNER JOIN customers AS c on o.custid = c.custid INNER JOIN employees AS e ON o.empid = e.empid ORDER BY qty DESC, orderDate DESC OFFSET 0 ROWS FETCH NEXT 5 ROWS ONLY; -- Query 7D -- Top 5 orders by qty. Break ties by selecting most recent. Now add customer and employee -- Alternative: use Window functions WITH first AS ( SELECT c.name AS custName, e.firstname AS empFirst, e.lastname AS empLast, od.orderid, productid, unitprice, qty, o.orderDate, ROW_NUMBER() OVER(ORDER BY qty DESC, orderDate DESC) AS rn FROM orderDetails AS od INNER JOIN orders AS o ON od.orderid = o.orderid INNER JOIN customers AS c on o.custid = c.custid INNER JOIN employees AS e ON o.empid = e.empid ) SELECT * FROM first WHERE rn <= 5 ORDER BY qty DESC, orderDate DESC; -- Query 8 -- Skip top 200 orders by qty and now get the orders that rank 201-210. Break ties by selecting most recent. SELECT c.name AS custName, e.firstname AS empFirst, e.lastname AS empLast, od.orderid, productid, unitprice, qty, o.orderDate FROM orderDetails AS od INNER JOIN orders AS o ON od.orderid = o.orderid INNER JOIN customers AS c on o.custid = c.custid INNER JOIN employees AS e ON o.empid = e.empid ORDER BY qty DESC, orderDate DESC OFFSET 200 ROWS FETCH NEXT 10 ROWS ONLY; /* custName empFirst empLast orderid productid unitprice qty orderDate Lucy Barb Smith 1700 103 30.00 12 2021-03-01 Kim Carol Johnson 1918 103 30.00 12 2021-02-19 Ellen Carol Johnson 1079 102 20.00 12 2021-02-18 Lucy Barb Smith 1763 102 20.00 12 2021-01-11 Kim Barb Smith 1285 101 10.00 12 2021-01-03 Ellen Barb Smith 1476 103 30.00 12 2020-12-28 Lucy Al Jones 1771 102 20.00 12 2020-12-01 John Carol Johnson 1842 102 20.00 12 2020-10-30 Fran Carol Johnson 1395 104 40.00 12 2020-10-21 George Carol Johnson 1808 104 40.00 12 2020-10-15 */ -- Query 9 -- GET names of customers with at least 100 orders SELECT c.custid, c.name AS custName, COUNT(orderid) AS numorders FROM orders AS o INNER JOIN customers AS c ON o.custid = c.custid GROUP BY c.custid, c.name HAVING COUNT(orderid) >= 100 ORDER BY numorders DESC; -- Query 10 -- Get basic data on all sales that were 1, 2, 3, 4, 5 days before December 25, 2021 -- case when 1 then 'one day early' etc SELECT o.orderid, o.custid, o.empid, o.orderdate, o.shipcountry, CASE WHEN DATEDIFF(day, o.orderdate, '20211225') = 1 THEN '1 day before' WHEN DATEDIFF(day, o.orderdate, '20211225') = 2 THEN '2 days before' WHEN DATEDIFF(day, o.orderdate, '20211225') = 3 THEN '3 days before' WHEN DATEDIFF(day, o.orderdate, '20211225') = 4 THEN '4 days before' WHEN DATEDIFF(day, o.orderdate, '20211225') = 5 THEN '5 days before' END AS daysBefore FROM orders AS o WHERE DATEDIFF(day, o.orderdate, '20211225') <= 5 AND DATEDIFF(day, orderdate, '20211225') > 0 ORDER BY o.orderdate DESC; -- Query 11 -- orders placed on last days of months in 2021 SELECT o.orderid, o.custid, o.empid, o.orderdate, o.shipcountry FROM orders AS o WHERE o.orderdate = EOMONTH(o.orderdate) AND o.orderdate >= '20210101' AND o.orderdate < '20220101' ORDER BY o.orderdate, o.orderid; -- Query 12 -- Get max order as measured by value. Note a sale may appear in several rows if multiple items were purchased WITH first AS ( SELECT orderid, SUM(unitprice*qty) AS totalSale FROM orderDetails GROUP BY orderid ) SELECT MAX(totalSale) AS maxSale FROM first AS f; -- Query 13 -- Get max order for each customer as measured by value. WITH first AS ( SELECT DISTINCT o.custid, c.name AS custName, od.orderid, SUM(unitprice*qty) OVER(PARTITION BY o.custid, od.orderid) AS totalSale, ROW_NUMBER() OVER(PARTITION BY o.custid, od.orderid ORDER BY unitprice*qty DESC) AS rnk FROM orderDetails AS od INNER JOIN orders AS o ON od.orderid = o.orderid INNER JOIN customers AS c ON o.custid = c.custid WHERE o.custid IS NOT NULL ) SELECT custid, custName, MAX(totalSale) maxSale FROM first WHERE rnk = 1 GROUP BY custid, custName ORDER BY custid; -- Query 14 -- For each customer compute the amount the order is out of the total for that customer -- Use subquery SELECT c1.custid, od1.orderid, od1.productid, od1.unitprice*od1.qty AS saleValue, CAST(100. * unitprice*od1.qty / (SELECT SUM(od2.unitprice*od2.qty) FROM orderDetails AS od2 INNER JOIN orders AS o2 ON od2.orderid = o2.orderid INNER JOIN customers AS c2 ON o2.custid = c2.custid WHERE c2.custid = c1.custid) AS NUMERIC(5,2)) AS pct FROM orderdetails AS od1 INNER JOIN orders AS o1 ON od1.orderid = o1.orderid INNER JOIN customers AS c1 ON o1.custid = c1.custid ORDER BY o1.custid; -- Query 15A -- Customers from Mexico who placed orders -- Recall that Mary is from Mexico but she did not place any orders SELECT c.name FROM customers AS c WHERE c.country = 'Mexico' AND c.custid IN (SELECT o.custid FROM orders AS o ) -- Query 15B -- Using EXIST SELECT c.name FROM customers AS c WHERE c.country = 'Mexico' AND EXISTS (SELECT * FROM orders AS o WHERE o.custid = c.custid); /* name James Lucy */ -- Query 16A -- Get the running totals for sales values for all orders when ordered by date with -- orderid and productID as tie breakers -- Solution using subqueries SELECT o.custid, o.orderid, od.productid, o.orderdate, od.unitprice*od.qty AS total, (SELECT SUM(od2.unitprice*od2.qty) FROM orderDetails AS od2 INNER JOIN orders AS o2 ON od2.orderid = o2.orderid WHERE o2.orderdate <= o.orderdate ) AS runningTotal FROM orders AS o INNER JOIN orderDetails AS od ON o.orderid = od.orderid ORDER BY o.orderdate, o.orderid, od.productid; -- Query 16B -- Solution using window functions SELECT o.custid, o.orderid, od.productid, o.orderdate, od.unitprice*od.qty AS total, SUM(od.unitprice*od.qty) OVER(ORDER BY o.orderdate, o.orderid, od.productid ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS runTotal FROM orders AS o INNER JOIN orderDetails AS od ON o.orderid = od.orderid ORDER BY o.orderdate, o.orderid, od.productid; -- Query 16C -- Running totals for each customer -- Solution using window functions -- Do not include data where custid is missing SELECT o.custid, o.orderid, od.productid, o.orderdate, od.unitprice*od.qty AS total, SUM(od.unitprice*od.qty) OVER(PARTITION BY o.custid ORDER BY o.orderdate, o.orderid, od.productid ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS runTotal FROM orders AS o INNER JOIN orderDetails AS od ON o.orderid = od.orderid WHERE o.custid IS NOT NULL ORDER BY o.custid, o.orderdate, o.orderid, od.productid; -- Query 17 SELECT DISTINCT o.custid, c.name FROM orders AS o INNER JOIN customers AS c ON o.custid = c.custid WHERE o.custid NOT IN ( SELECT o2.custid FROM orders AS o2 WHERE o2.orderdate >= '20211201' AND o2.orderdate <= '20211231' ) ORDER BY c.name; /* custid name 58 Kim */ -- Query 18 -- Number of days since previous order SELECT orderdate, empid, custid,orderid, LAG(orderdate) OVER(PARTITION BY custid ORDER BY orderdate) AS prevorderdate, DATEDIFF(day, LAG(orderdate) OVER(PARTITION BY custid ORDER BY orderdate), orderdate) AS day_diff FROM Orders WHERE custid IS NOT NULL ORDER BY custid, orderdate; -- Query 19A -- Previous and next order using subqueries -- Please note that subqueries are much slower than window functions SELECT orderdate, empid, custid,orderid, (SELECT MAX(o2.orderid) FROM orders AS O2 WHERE o2.orderid < o1.orderid AND o2.custid = o1.custid AND o2.custid IS NOT NULL) AS prevorderid, (SELECT MIN(o3.orderid) FROM orders AS o3 WHERE o3.orderid > o1.orderid AND o3.custid = o1.custid AND o3.custid IS NOT NULL) AS nextorderid FROM Orders AS o1 WHERE o1.custid IS NOT NULL ORDER BY o1.custid, o1.orderid; -- Query 19B -- Previous and next order using window functions SELECT orderdate, empid, custid,orderid, LAG(orderid) OVER(PARTITION BY custid ORDER BY orderid) AS prevorderid, LEAD(orderid) OVER(PARTITION BY custid ORDER BY orderid) AS nextorderid FROM Orders WHERE custid IS NOT NULL ORDER BY custid, orderid; -- Query 20 -- Get the employee name for orders missing custid or shipcountry SELECT o.orderid, e.firstName + ' ' + e.lastName AS employeeName FROM orders AS o INNER JOIN employees AS e ON o.empid = e.empID WHERE o.custid IS NULL OR o.shipcountry IS NULL ORDER BY o.orderid; -- Query 21 -- The details for every order -- Full outer join SELECT o.orderid, o.custid, o.empid, od.productid, o.orderdate, o.shipcountry, c.name AS custName, c.Country AS custCountry, e.firstName + ' ' + e.lastName AS empName, p.product, p.price, od.qty FROM orders AS o FULL OUTER JOIN orderDetails AS od ON o.orderid = od.orderid FULL OUTER JOIN customers AS c ON o.custid = c.custid FULL OUTER JOIN products AS p ON od.productid = p.prodID FULL OUTER JOIN employees AS e ON o.empid = e.empID; -- Query 22 -- number of balls sold by type SELECT p.prodID, p.product, SUM(qty) AS numSold FROM orderDetails AS od INNER JOIN products AS p ON od.productid = p.prodID GROUP BY p.prodID, p.product