Friday, February 12, 2016

Introduction to Outer Joins

A short video covering Outer Joins in SQL.


Here is the SQL that goes with the demo.

-- Database by Doug
-- 2/12/2016
-- Intro to Outer Joins

use northwind

-- refresher
-- a join can be restrictive
-- it can filter records that don't satisfy the join

SELECT COUNT(productID) AS [Number of Products]
FROM   Products
-- 79 products

SELECT   COUNT(products.productID) AS [Number of Products]
FROM     Products
   JOIN  Categories ON Products.CategoryID=Categories.CategoryID

-- 77 products joined with Categories?


-- the "ON Products.CategoryID=Categories.CategoryID"
-- is known as the "join condition"

-- we missed the products with the NULL CategoryID

SELECT   ProductID,
         ProductName,
         CategoryID
FROM     Products
ORDER BY CategoryID

-- in particular, these products don't satisfy the join

SELECT   ProductID,
         ProductName,
         CategoryID
FROM     Products
WHERE    CategoryID IS NULL

-- so we get this, 77 products:
SELECT   Products.ProductID,
         Products.ProductName,
         Products.CategoryID,
         Categories.CategoryID,
         Categories.CategoryName
FROM     Products
   JOIN  Categories ON Products.CategoryID = Categories.CategoryID
ORDER BY Products.ProductID

-- that's why there are 77 instead of 79

-- so what if we want to see that categoryID?
--  the boss says she wants to see 
-- *every* product
-- *even if* it has a NULL value for the categoryID

-- in other words
-- "show *every* product, and if that product has a category
--  associated with it, show me that too"

-- this is where an outer join comes in

-- 
-- the above queries have been using a shorthand
-- they are really INNER JOINS
-- by default "JOIN" means "INNER JOIN"
-- the following two queries are logically equivalent
-- and ordering by categoryID

SELECT         Products.ProductID,
               Products.ProductName,
               Products.CategoryID,
               Categories.CategoryID,
               Categories.CategoryName
FROM           Products
       JOIN    Categories ON Products.CategoryID = Categories.CategoryID
ORDER BY       Categories.CategoryID
--
SELECT         Products.ProductID,
               Products.ProductName,
               Products.CategoryID,
               Categories.CategoryID,
               Categories.CategoryName
FROM           Products
   INNER JOIN  Categories ON Products.CategoryID = Categories.CategoryID
ORDER BY       Categories.CategoryID

-- and reformatting ...
-- this is the same as above, just reformatted, more specific
-- this is to get the two tables on the same line
-- and one table is on the left of the "INNER JOIN" phrase
-- and one table is on the right of the "INNER JOIN" phrase

SELECT         Products.ProductID,
               Products.ProductName,
               Products.CategoryID,
               Categories.CategoryID,
               Categories.CategoryName
FROM           Products INNER JOIN Categories  
                  ON Products.CategoryID = Categories.CategoryID
ORDER BY       Categories.CategoryID

-- now, a small change to see the one product
-- that doesn't satisfy the join
-- (at the bottom)

SELECT         Products.ProductID,
               Products.ProductName,
               Products.CategoryID,
               Categories.CategoryID,
               Categories.CategoryName
FROM           Products LEFT JOIN Categories   -- the change is in this line
                  ON Products.CategoryID = Categories.CategoryID
ORDER BY       Categories.CategoryID

-- the LEFT JOIN says:
-- "include all records from the table on the left"
-- in this case, Products is on the left, 
-- so every product record is shown, 
-- even if it doesn't satisfy the join condition

-- of course there is a RIGHT JOIN also
-- the next two queries are logically the same

SELECT         Products.ProductID,
               Products.ProductName,
               Products.CategoryID,
               Categories.CategoryID,
               Categories.CategoryName
FROM           Products LEFT JOIN Categories 
                  ON Products.CategoryID = Categories.CategoryID
ORDER BY       Categories.CategoryID
-- 
SELECT         Products.ProductID,
               Products.ProductName,
               Products.CategoryID,
               Categories.CategoryID,
               Categories.CategoryName
FROM           Categories RIGHT JOIN Products 
                  ON Products.CategoryID = Categories.CategoryID
ORDER BY       Products.ProductID

-- these are actually shorthand for
SELECT         Products.ProductID,
               Products.ProductName,
               Products.CategoryID,
               Categories.CategoryID,
               Categories.CategoryName
FROM           Products LEFT OUTER JOIN Categories 
                  ON Products.CategoryID = Categories.CategoryID
ORDER BY       Categories.CategoryID

-- but the OUTER keyword is typically omitted

-- ** notice that the ON statement does not change
--    across INNER, LEFT, LEFT OUTER, RIGHT, RIGHT OUTER joins

-- OUTER joins are also known as "inclusive" joins,
-- because they *include* records that don't satisfy the join condition

-- INNER joins are also know as "exclusive" joins,
-- because they *exclude* records that don't satisfy the join condition


-- so the above query finds the product with a NULL
-- categoryID
-- the product record doesn't satisfy the join

-- what about a category that doesn't satisfy the join?

SELECT COUNT(CategoryID) AS [Number of Categories in Categories table]
FROM   Categories
-- ten categories

SELECT COUNT(DISTINCT CategoryID) AS [Number of Categories in Products table]
FROM   Products
-- only eight appear in the products table
-- but which 8?
-- we can't look for a NULL foreign key...

-- with few records, we can inspect the actual records...

SELECT   CategoryID
FROM     Categories
ORDER BY CategoryID

SELECT   DISTINCT CategoryID
FROM     Products
ORDER BY CategoryID

-- but what if there are thousands of categories
-- and millions of products?

-- an outer join..
-- from the other side...
-- "inclusive" of all Categories
SELECT   Categories.CategoryName,
         Categories.CategoryID,
         Products.CategoryID,
         Products.productID,
         Products.productName
FROM     Products RIGHT JOIN Categories 
            ON Products.CategoryID= Categories.CategoryID
ORDER BY Categories.CategoryID,
         Products.ProductID

-- so this includes *every* category, 
-- even the ones that don't satisfy the join
-- and also shows the category/product combinations that do satisfy the join

-- a common use of an outer join is 
-- to find records in one table that don't match
--  records from another table

-- this is sometimes called a "find unmatched" query

-- looking at the previous query again...

SELECT   Categories.CategoryName,
         Categories.CategoryID,
         Products.CategoryID,
         Products.productID,
         Products.productName
FROM     Products RIGHT JOIN Categories 
            ON Products.CategoryID= Categories.CategoryID
ORDER BY Categories.CategoryID,
         Products.ProductID

-- and rephrasing it as a LEFT JOIN...

SELECT   Categories.CategoryName,
         Categories.CategoryID,
         Products.CategoryID,
         Products.productID,
         Products.productName
FROM     Categories LEFT JOIN Products 
            ON Products.CategoryID= Categories.CategoryID
ORDER BY Categories.CategoryID,
         Products.ProductID

-- note the two categories with no matching products
-- they both have a NULL primary key, which can't be, right?
-- which is a sign that there really is no product record
-- so let's limit to only that 
-- to limit to only the categories with no products

SELECT   Categories.CAtegoryName,
         Products.productID,
         Products.productName
FROM     Categories LEFT JOIN  Products
            ON Products.CategoryID= Categories.CategoryID
WHERE    Products.productID IS NULL
ORDER BY Categories.CAtegoryName,
         Products.productName

-- and formatting for readability
SELECT      Categories.CAtegoryName,
            Products.productID,
            Products.productName
FROM        Categories 
  LEFT JOIN Products ON Products.CategoryID = Categories.CategoryID
WHERE       Products.productID IS NULL
ORDER BY    Categories.CategoryName,
            Products.productName

-- and now, a FULL OUTER JOIN
-- this shows *every* category
-- and *every* product
-- even the ones that don't satisfy the join
-- its the left and the right together

SELECT   Categories.CategoryName,
         Categories.CategoryID,
         Products.CategoryID,
         Products.productID,
         Products.productName
FROM     Products FULL OUTER JOIN Categories 
            ON Products.CategoryID= Categories.CategoryID
ORDER BY Categories.CAtegoryName,
         Products.productName

-- and formatted consistently with the above examples

SELECT               Categories.CategoryName,
                     Categories.CategoryID,
                     Products.CategoryID,
                     Products.productID,
                     Products.productName
FROM                 Products 
   FULL OUTER JOIN   Categories ON Products.CategoryID= Categories.CategoryID
ORDER BY             Categories.CAtegoryName,
                     Products.productName


-- Database by Doug
-- 2/12/2016
-- Intro to Outer Joins

No comments:

Post a Comment

Followers