Thursday, October 12, 2017

Quick! What's the difference between RANK, DENSE_RANK, and ROW_NUMBER?

-- Database by Doug
-- Douglas Kline
-- 10/12/2017
-- Quick! What's the difference between RANK, DENSE_RANK, and ROW_NUMBER?

-- in short, they are only different when there are ties...

-- here's a table that will help show the difference
-- between the ranking functions

-- note the [Score] column, 
-- it will be the basis of the ranking

SELECT   [Name],
         [Score]
FROM     (VALUES  ('A', 100),
                  ('B', 101), 
                  ('C', 101), 
                  ('D', 101), 
                  ('E', 102), 
                  ('F', 103) ) AS Temp([Name], [Score])
ORDER BY [Score] ASC

-- first, ROW_NUMBER

SELECT   [Name],
         [Score],
         ROW_NUMBER() OVER (ORDER BY [Score]) AS [row_number]
FROM     (VALUES  ('A', 100),
                  ('B', 101), 
                  ('C', 101), 
                  ('D', 101), 
                  ('E', 102), 
                  ('F', 103) ) AS Temp([Name], [Score])
ORDER BY [Score] ASC

-- row number gives every item a unique number
-- it does NOT recognize ties
-- no numbers are repeated, no numbers are skipped

-- next, RANK()

SELECT   [Name],
         [Score],
         ROW_NUMBER() OVER (ORDER BY [Score]) AS [row_number],
         RANK()       OVER (ORDER BY [Score]) AS [rank]
FROM     (VALUES  ('A', 100),
                  ('B', 101), 
                  ('C', 101), 
                  ('D', 101), 
                  ('E', 102), 
                  ('F', 103) ) AS Temp([Name], [Score])
ORDER BY [Score] ASC

-- RANK() recognizes "ties" by repeating the same rank value
-- but then skipping to the next row_number
-- note the skip from 2,2,2 to rank of 5

-- now look at dense rank

SELECT   [Name],
         [Score],
         ROW_NUMBER() OVER (ORDER BY [Score]) AS [row_number],
         RANK()       OVER (ORDER BY [Score]) AS [rank],
         DENSE_RANK() OVER (ORDER BY [Score]) AS [dense_rank]
FROM     (VALUES  ('A', 100),
                  ('B', 101), 
                  ('C', 101), 
                  ('D', 101), 
                  ('E', 102), 
                  ('F', 103) ) AS Temp([Name], [Score])
ORDER BY [Score] ASC

-- dense_rank handles ties
-- but does not "skip" ranks
-- note the 2, 2, 2 then 3

-- row_number, rank, and dense_rank 
-- are the SAME when there are no ties

-- to prove it, see this, where the Scores are all unique

SELECT   [Name],
         [Score],
         ROW_NUMBER() OVER (ORDER BY [Score]) AS [row_number],
         RANK()       OVER (ORDER BY [Score]) AS [rank],
         DENSE_RANK() OVER (ORDER BY [Score]) AS [dense_rank]
FROM     (VALUES  ('A', 100),
                  ('B', 101.0), -- added a decimal point 
                  ('C', 101.1), -- to break the ties
                  ('D', 101.2), 
                  ('E', 102), 
                  ('F', 103) ) AS Temp([Name], [Score])
ORDER BY [Score] ASC

-- Database by Doug
-- Douglas Kline
-- 10/12/2017
-- Quick! What's the difference between RANK, DENSE_RANK, and ROW_NUMBER?


No comments:

Post a Comment

Followers