-- Database by Doug
-- Douglas Kline
-- 10/10/2017
-- RANKing functions, Part 1: OVER
USE Northwind
-- here's a simple SELECT statement from the Products table
SELECT ProductName,
UnitPrice
FROM Products
ORDER BY UnitPrice DESC
-- this shows that the highest priced product is Cote de Blaye, productID 38
-- but sometimes the *relative* price is more important than the actual price
-- in other words, we want to know how products *rank*, based on price
-- So, here's how you would do that in SQL Server:
SELECT ProductName,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank]
FROM Products
ORDER BY UnitPrice DESC
-- notice that the highest priced product is ranked 1
-- the second highest priced product has a rank of 2
-- let's look at the syntax
-- RANK() is a function that takes no parameters
-- but it requires an OVER clause
-- so this doesn't work, giving an error
SELECT ProductName,
UnitPrice,
RANK() -- no over
FROM Products
ORDER BY UnitPrice DESC
-- the OVER clause tells the RANK how to do its ranking
-- in our example, the phrase (ORDER BY unitprice DESC) means
-- "When calculating RANK, rank based on price, descending."
SELECT ProductName,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank]
FROM Products
ORDER BY UnitPrice DESC
-- we could also choose to rank the products by productname:
SELECT ProductName,
UnitPrice,
RANK() OVER (ORDER BY ProductName ASC) AS [name rank]
FROM Products
ORDER BY UnitPrice DESC
-- note that I didn't change the record ordering
-- so its hard to tell if the rank is correct
-- let's fix that:
SELECT ProductName,
UnitPrice,
RANK() OVER (ORDER BY ProductName ASC) AS [name rank]
FROM Products
ORDER BY ProductName ASC
-- so now I can visually verify the ranking
-- but, this makes demonstrates something important:
-- the ORDER BY in an OVER clause only affects the ranking function
-- it is independent of the ORDER BY for the SELECT statement
-- in other words, you can rank in one way, and show records another
-- for example, you might want to show products alphabetically,
-- but also show their rank in terms of price:
SELECT ProductName,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank]
FROM Products
ORDER BY ProductName ASC
-- now consider this
SELECT ProductName,
CategoryID,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank]
FROM Products
ORDER BY unitprice DESC
-- notice that Ipoh Coffee is ranked 9th overall
-- but is also the 2nd highest priced product in CategoryID 1
-- so, I could do this:
SELECT ProductName,
CategoryID,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank in category]
FROM Products
WHERE CategoryID = 1
UNION
SELECT ProductName,
CategoryID,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank in category]
FROM Products
WHERE CategoryID = 2
ORDER BY unitprice DESC
-- etc.
-- but that's a pain
-- and is really slow at scale
-- let's use the OVER clause to tell RANK to *rank by category*
SELECT ProductName,
CategoryID,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank overall],
RANK() OVER (PARTITION BY CategoryID ORDER BY unitprice DESC) AS [price rank in category]
FROM Products
ORDER BY unitprice DESC
-- here it is formatted to show the difference:
SELECT ProductName,
CategoryID,
UnitPrice,
RANK() OVER ( ORDER BY unitprice DESC) AS [price rank overall],
RANK() OVER (PARTITION BY CategoryID ORDER BY unitprice DESC) AS [price rank in category]
FROM Products
ORDER BY unitprice DESC
-- here, we've calculated rank in two different ways
-- the PARTITION BY says: rank against other products with the same CategoryID
-- now let's do it again, but rank products
-- against other products from the same supplier
-- and to prove that the rankings are true,
-- we can order the records in different ways:
SELECT ProductName,
CategoryID,
SupplierID,
UnitPrice,
RANK() OVER ( ORDER BY unitprice DESC) AS [price rank overall],
RANK() OVER (PARTITION BY CategoryID ORDER BY unitprice DESC) AS [price rank in category],
RANK() OVER (PARTITION BY SupplierID ORDER BY unitprice DESC) AS [price rank by supplier]
FROM Products
--ORDER BY unitprice DESC
--ORDER BY CategoryID, unitprice DESC
ORDER BY SupplierID, unitprice DESC
-- one final example
-- let's rank by dollar value in inventory, within category
SELECT ProductName,
CategoryID,
unitprice * UnitsInStock AS [dollar value in inventory],
RANK() OVER (PARTITION BY CategoryID
ORDER BY unitprice * UnitsInStock DESC,
ProductID ASC) AS [value rank by category]
FROM Products
ORDER BY CategoryID,
[dollar value in inventory] DESC
-- notice that the ORDER BY in the OVER can be an expression
-- in the future videos, I'll cover ROW_NUMBER(), DENSE_RANK(), and NTILE(n)
-- they are all ranking functions
-- and they all use the OVER clause in exactly the same way
-- but they calculate a "rank" in various ways
-- Database by Doug
-- Douglas Kline
-- 10/10/2017
-- RANKing functions, Part 1: OVER
Thursday, October 12, 2017
SQL Ranking Functions, Part 1: the OVER clause
Subscribe to:
Post Comments (Atom)
Really helped!
ReplyDelete