-- Database by Doug
-- Douglas Kline
-- 10/10/2017
-- RANKing functions, Part 1: OVER
USE Northwind
-- here's a simple SELECT statement from the Products table
SELECT ProductName,
UnitPrice
FROM Products
ORDER BY UnitPrice DESC
-- this shows that the highest priced product is Cote de Blaye, productID 38
-- but sometimes the *relative* price is more important than the actual price
-- in other words, we want to know how products *rank*, based on price
-- So, here's how you would do that in SQL Server:
SELECT ProductName,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank]
FROM Products
ORDER BY UnitPrice DESC
-- notice that the highest priced product is ranked 1
-- the second highest priced product has a rank of 2
-- let's look at the syntax
-- RANK() is a function that takes no parameters
-- but it requires an OVER clause
-- so this doesn't work, giving an error
SELECT ProductName,
UnitPrice,
RANK() -- no over
FROM Products
ORDER BY UnitPrice DESC
-- the OVER clause tells the RANK how to do its ranking
-- in our example, the phrase (ORDER BY unitprice DESC) means
-- "When calculating RANK, rank based on price, descending."
SELECT ProductName,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank]
FROM Products
ORDER BY UnitPrice DESC
-- we could also choose to rank the products by productname:
SELECT ProductName,
UnitPrice,
RANK() OVER (ORDER BY ProductName ASC) AS [name rank]
FROM Products
ORDER BY UnitPrice DESC
-- note that I didn't change the record ordering
-- so its hard to tell if the rank is correct
-- let's fix that:
SELECT ProductName,
UnitPrice,
RANK() OVER (ORDER BY ProductName ASC) AS [name rank]
FROM Products
ORDER BY ProductName ASC
-- so now I can visually verify the ranking
-- but, this makes demonstrates something important:
-- the ORDER BY in an OVER clause only affects the ranking function
-- it is independent of the ORDER BY for the SELECT statement
-- in other words, you can rank in one way, and show records another
-- for example, you might want to show products alphabetically,
-- but also show their rank in terms of price:
SELECT ProductName,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank]
FROM Products
ORDER BY ProductName ASC
-- now consider this
SELECT ProductName,
CategoryID,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank]
FROM Products
ORDER BY unitprice DESC
-- notice that Ipoh Coffee is ranked 9th overall
-- but is also the 2nd highest priced product in CategoryID 1
-- so, I could do this:
SELECT ProductName,
CategoryID,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank in category]
FROM Products
WHERE CategoryID = 1
UNION
SELECT ProductName,
CategoryID,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank in category]
FROM Products
WHERE CategoryID = 2
ORDER BY unitprice DESC
-- etc.
-- but that's a pain
-- and is really slow at scale
-- let's use the OVER clause to tell RANK to *rank by category*
SELECT ProductName,
CategoryID,
UnitPrice,
RANK() OVER (ORDER BY unitprice DESC) AS [price rank overall],
RANK() OVER (PARTITION BY CategoryID ORDER BY unitprice DESC) AS [price rank in category]
FROM Products
ORDER BY unitprice DESC
-- here it is formatted to show the difference:
SELECT ProductName,
CategoryID,
UnitPrice,
RANK() OVER ( ORDER BY unitprice DESC) AS [price rank overall],
RANK() OVER (PARTITION BY CategoryID ORDER BY unitprice DESC) AS [price rank in category]
FROM Products
ORDER BY unitprice DESC
-- here, we've calculated rank in two different ways
-- the PARTITION BY says: rank against other products with the same CategoryID
-- now let's do it again, but rank products
-- against other products from the same supplier
-- and to prove that the rankings are true,
-- we can order the records in different ways:
SELECT ProductName,
CategoryID,
SupplierID,
UnitPrice,
RANK() OVER ( ORDER BY unitprice DESC) AS [price rank overall],
RANK() OVER (PARTITION BY CategoryID ORDER BY unitprice DESC) AS [price rank in category],
RANK() OVER (PARTITION BY SupplierID ORDER BY unitprice DESC) AS [price rank by supplier]
FROM Products
--ORDER BY unitprice DESC
--ORDER BY CategoryID, unitprice DESC
ORDER BY SupplierID, unitprice DESC
-- one final example
-- let's rank by dollar value in inventory, within category
SELECT ProductName,
CategoryID,
unitprice * UnitsInStock AS [dollar value in inventory],
RANK() OVER (PARTITION BY CategoryID
ORDER BY unitprice * UnitsInStock DESC,
ProductID ASC) AS [value rank by category]
FROM Products
ORDER BY CategoryID,
[dollar value in inventory] DESC
-- notice that the ORDER BY in the OVER can be an expression
-- in the future videos, I'll cover ROW_NUMBER(), DENSE_RANK(), and NTILE(n)
-- they are all ranking functions
-- and they all use the OVER clause in exactly the same way
-- but they calculate a "rank" in various ways
-- Database by Doug
-- Douglas Kline
-- 10/10/2017
-- RANKing functions, Part 1: OVER
Thursday, October 12, 2017
SQL Ranking Functions, Part 1: the OVER clause
Quick! What's the difference between RANK, DENSE_RANK, and ROW_NUMBER?
-- Database by Doug
-- Douglas Kline
-- 10/12/2017
-- Quick! What's the difference between RANK, DENSE_RANK, and ROW_NUMBER?
-- in short, they are only different when there are ties...
-- here's a table that will help show the difference
-- between the ranking functions
-- note the [Score] column,
-- it will be the basis of the ranking
SELECT [Name],
[Score]
FROM (VALUES ('A', 100),
('B', 101),
('C', 101),
('D', 101),
('E', 102),
('F', 103) ) AS Temp([Name], [Score])
ORDER BY [Score] ASC
-- first, ROW_NUMBER
SELECT [Name],
[Score],
ROW_NUMBER() OVER (ORDER BY [Score]) AS [row_number]
FROM (VALUES ('A', 100),
('B', 101),
('C', 101),
('D', 101),
('E', 102),
('F', 103) ) AS Temp([Name], [Score])
ORDER BY [Score] ASC
-- row number gives every item a unique number
-- it does NOT recognize ties
-- no numbers are repeated, no numbers are skipped
-- next, RANK()
SELECT [Name],
[Score],
ROW_NUMBER() OVER (ORDER BY [Score]) AS [row_number],
RANK() OVER (ORDER BY [Score]) AS [rank]
FROM (VALUES ('A', 100),
('B', 101),
('C', 101),
('D', 101),
('E', 102),
('F', 103) ) AS Temp([Name], [Score])
ORDER BY [Score] ASC
-- RANK() recognizes "ties" by repeating the same rank value
-- but then skipping to the next row_number
-- note the skip from 2,2,2 to rank of 5
-- now look at dense rank
SELECT [Name],
[Score],
ROW_NUMBER() OVER (ORDER BY [Score]) AS [row_number],
RANK() OVER (ORDER BY [Score]) AS [rank],
DENSE_RANK() OVER (ORDER BY [Score]) AS [dense_rank]
FROM (VALUES ('A', 100),
('B', 101),
('C', 101),
('D', 101),
('E', 102),
('F', 103) ) AS Temp([Name], [Score])
ORDER BY [Score] ASC
-- dense_rank handles ties
-- but does not "skip" ranks
-- note the 2, 2, 2 then 3
-- row_number, rank, and dense_rank
-- are the SAME when there are no ties
-- to prove it, see this, where the Scores are all unique
SELECT [Name],
[Score],
ROW_NUMBER() OVER (ORDER BY [Score]) AS [row_number],
RANK() OVER (ORDER BY [Score]) AS [rank],
DENSE_RANK() OVER (ORDER BY [Score]) AS [dense_rank]
FROM (VALUES ('A', 100),
('B', 101.0), -- added a decimal point
('C', 101.1), -- to break the ties
('D', 101.2),
('E', 102),
('F', 103) ) AS Temp([Name], [Score])
ORDER BY [Score] ASC
-- Database by Doug
-- Douglas Kline
-- 10/12/2017
-- Quick! What's the difference between RANK, DENSE_RANK, and ROW_NUMBER?
Wednesday, March 15, 2017
One Bit per Atom
IBM has, in the laboratory, been able to store a single bit on a single atom.
http://thehackernews.com/2017/ 03/atom-data-storage.html
http://thehackernews.com/2017/
Monday, March 13, 2017
Ordering mixed Alpha and Digit Characters
Suppose you have a VARCHAR column that has a mix of alpha and digit characters. In other words, the values represent different types across rows.
Like this:
SELECT mixedColumn
FROM (VALUES ('fred'),
(CONVERT (VARCHAR, 1)),
(CONVERT (VARCHAR, 3)),
('george'),
(CONVERT (VARCHAR, 7)),
('ginger')
) AS myTable(mixedColumn)
So how does this get sorted, by default?
SELECT mixedAlphaColumn
FROM (VALUES ('fred'),
(CONVERT (VARCHAR, 1)),
(CONVERT (VARCHAR, 3)),
('george'),
(CONVERT (VARCHAR, 7)),
('ginger')
) AS myTable(mixedAlphaColumn)
ORDER BY mixedAlphaColumn
Which produces:
This gets ordered based on its ASCII code value. Since all ASCII digit characters come before alpha characters, digits will come before alpha when using ORDER BY. You can see this with this code:
SELECT mixedColumn,
ASCII(LEFT(mixedColumn, 1)) AS [ASCII Value]
FROM (VALUES ('fred'),
(CONVERT (VARCHAR, 1)),
(CONVERT (VARCHAR, 3)),
('george'),
(CONVERT (VARCHAR, 7)),
('ginger')
) AS myTable(mixedColumn)
ORDER BY mixedColumn
Which produces:
But what if we want to have the digit characters come after the alpha characters?
We could do this:
SELECT mixedColumn,
ASCII(LEFT(mixedColumn, 1)) AS [ASCII Value]
FROM (VALUES ('fred'),
(CONVERT (VARCHAR, 1)),
(CONVERT (VARCHAR, 3)),
('george'),
(CONVERT (VARCHAR, 7)),
('ginger')
) AS myTable(mixedColumn)
ORDER BY mixedColumn DESC
Which produces this:
But what if we want the digits ordered ascending, and the alpha ordered ascending, but the alpha before the digits?
Here's the trick - the ISNUMERIC function.
SELECT mixedColumn,
ISNUMERIC(mixedColumn) AS [isNumeric]
FROM (VALUES ('fred'),
(CONVERT (VARCHAR, 1)),
(CONVERT (VARCHAR, 3)),
('george'),
(CONVERT (VARCHAR, 7)),
('ginger')
) AS myTable(mixedColumn)
ORDER BY ISNUMERIC(mixedColumn),
mixedColumn
Which produces:
And of course, we don't need to display the isNumeric column:
SELECT mixedColumn
FROM (VALUES ('fred'),
(CONVERT (VARCHAR, 1)),
(CONVERT (VARCHAR, 3)),
('george'),
(CONVERT (VARCHAR, 7)),
('ginger')
) AS myTable(mixedColumn)
ORDER BY ISNUMERIC(mixedColumn),
mixedColumn
Which produces:
The ISNUMERIC function became available in SQL 2008. However, be careful, since certain characters that you might expect to be alpha are actually interpreted as digit, e.g. periods, commas, plus sign, etc.
Subscribe to:
Posts (Atom)





