Here is the SQL that goes with the demo.
-- Database by Doug
-- Douglas Kline
-- 2/17/2016
-- HAVING - how to limit which groups are shown
USE Northwind
-- you might want to check out
-- GROUP BY part 1 - introduction
-- GROUP BY part 2 - NULL handling and COUNTs
-- GROUP BY part 3 - unique groupings and multi-column groups
-- a simple query from Products, leaving out any JOINs for clarity
SELECT CategoryID,
ProductID,
ProductName,
Unitprice
FROM Products
ORDER BY CategoryID,
ProductID
-- now grouping by Category, and calculating average unit price
SELECT CategoryID,
AVG(Unitprice) AS [average unit price]
FROM Products
GROUP BY CategoryID
ORDER BY CategoryID
-- now suppose that we'd like to only see
-- "high priced categories"
-- we could maybe do this:
SELECT CategoryID,
AVG(Unitprice) AS [average unit price]
FROM Products
GROUP BY CategoryID
ORDER BY [average unit price] DESC
-- now the higher priced categories are at the top
-- I might even be able to use the non-ANSI-standard
-- TOP clause like this:
SELECT TOP 5
CategoryID,
AVG(Unitprice) AS [average unit price]
FROM Products
GROUP BY CategoryID
ORDER BY [average unit price] DESC
-- but what if I want a cut-off
-- only show categories whose average unitprice is above $30
-- there's a statement for that: HAVING
SELECT CategoryID,
AVG(Unitprice) AS [average unit price]
FROM Products
GROUP BY CategoryID
HAVING AVG(Unitprice) > 30 -- this is the new piece
ORDER BY [average unit price] DESC
-- the new part is the HAVING clause
-- the HAVING clause is similar to the WHERE clause
-- in that it specifies a logical expression
-- that can be evaluated as true or false
-- however, the logical expression is evaluated
-- for each *group* (in our case, each group represents a category)
-- with the WHERE clause
-- the logical expression is evaluated
-- for each *record* (in our case, each record represents a product)
-- consider this example
SELECT CategoryID,
AVG(Unitprice) AS [average unit price]
FROM Products
GROUP BY CategoryID
HAVING COUNT(productID) > 2 -- this is the new piece
ORDER BY [average unit price] DESC
-- here we are saying, only show categories
-- that have at least two products in them
-- notice that the HAVING doesn't have to be
-- something in the SELECT clause
-- consider this example
SELECT CategoryID,
AVG(Unitprice) AS [average unit price]
FROM Products
GROUP BY CategoryID
HAVING MIN(unitprice) > 5 -- this is the new piece
ORDER BY [average unit price] DESC
-- here, only show categories
-- with minimum-priced products greater than $5
-- note that in the last three examples,
-- the average unit price does not change for each category
-- what changes, is which categories are displayed
-- now consider these:
SELECT CategoryID,
AVG(Unitprice) AS [average unit price]
FROM Products
GROUP BY CategoryID
ORDER BY [average unit price] DESC
SELECT CategoryID,
AVG(Unitprice) AS [average unit price]
FROM Products
WHERE unitsInStock > 0 -- this is the changed part
GROUP BY CategoryID
ORDER BY [average unit price] DESC
-- notice that the average unit prices change
-- and potentially, the order of the rows could change
-- this is because only products with unitsInStock > 0
-- are included in the groups
-- and therefore, the aggregations (AVG, MIN, MAX, COUNT, etc.)
-- change
-- so, here is a good way
-- to think about the order
-- in which the operations are performed
-- WHERE comes first - records are eliminated before the grouping
-- GROUP BY comes next - remaining records are arranged into groups
-- HAVING comes next - groups are eliminated
-- ORDER BY comes last - what's left gets ordered
-- a larger last example
SELECT Suppliers.CompanyName,
SUM(Products.unitprice
* Products.unitsInStock
) AS [Value of Inventory]
FROM Products
JOIN Suppliers ON Products.SupplierID = Suppliers.SupplierID
WHERE Products.CategoryID IN (1, 3)
GROUP BY Suppliers.CompanyName,
Suppliers.SupplierID
HAVING COUNT(ProductID) > 1
ORDER BY [Value of Inventory] DESC
-- in english:
-- show the dollar value of inventory, by supplier
-- only include products in categories 1 and 3
-- only include suppliers that supply more than one product
-- order the suppliers by descending value of inventory
-- a few things to note
-- the GROUP BY has a Suppliers.SupplierID to ensure a unique grouping
-- The WHERE clause could be changed without affecting any other part
-- THE HAVING clause could be changed without affecting any other part
-- (of course the results change...)
-- another interesting note: HAVING is not strictly needed
-- you can accomplish a HAVING using a subquery
-- going back to my first example
SELECT CategoryID,
AVG(Unitprice) AS [average unit price]
FROM Products
GROUP BY CategoryID
HAVING AVG(Unitprice) > 30
ORDER BY [average unit price] DESC
-- could equivalently be done
-- using a subquery in the FROM clause
-- like this:
SELECT CategoryID,
[average unit price]
FROM (SELECT CategoryID,
AVG(Unitprice) AS [average unit price]
FROM Products
GROUP BY CategoryID) [CategorySubquery]
WHERE [average unit price] > 30
ORDER BY [average unit price] DESC
-- but the HAVING clause is much clearer
-- especially with situations like one-to-many aggregation joins
-- like the one above:
SELECT Suppliers.CompanyName,
SUM(Products.unitprice
* Products.unitsInStock
) AS [Value of Inventory]
FROM Products
JOIN Suppliers ON Products.SupplierID = Suppliers.SupplierID
JOIN (SELECT SupplierID,
COUNT(ProductID) AS [product count]
FROM Products
WHERE Products.CategoryID IN (1,3)
GROUP BY SupplierID
) [SupplierGroup]
ON Suppliers.SupplierID = SupplierGroup.SupplierID
WHERE [SupplierGroup].[product count] > 1
GROUP BY Suppliers.CompanyName,
Suppliers.SupplierID
ORDER BY [Value of Inventory] DESC
-- compared to:
SELECT Suppliers.CompanyName,
SUM(Products.unitprice
* Products.unitsInStock
) AS [Value of Inventory]
FROM Products
JOIN Suppliers ON Products.SupplierID = Suppliers.SupplierID
WHERE Products.CategoryID IN (1, 3)
GROUP BY Suppliers.CompanyName,
Suppliers.SupplierID
HAVING COUNT(ProductID) > 1
ORDER BY [Value of Inventory] DESC
-- thanks for watching and / or reading
-- Database by Doug
-- Douglas Kline
-- 2/17/2016
-- HAVING - how to limit which groups are shown
No comments:
Post a Comment