Here is the SQL that goes with the demo.
-- Database by Doug
-- 2/12/2016
-- Intro to Outer Joins
use northwind
-- refresher
-- a join can be restrictive
-- it can filter records that don't satisfy the join
SELECT COUNT(productID) AS [Number of Products]
FROM Products
-- 79 products
SELECT COUNT(products.productID) AS [Number of Products]
FROM Products
JOIN Categories ON Products.CategoryID=Categories.CategoryID
-- 77 products joined with Categories?
-- the "ON Products.CategoryID=Categories.CategoryID"
-- is known as the "join condition"
-- we missed the products with the NULL CategoryID
SELECT ProductID,
ProductName,
CategoryID
FROM Products
ORDER BY CategoryID
-- in particular, these products don't satisfy the join
SELECT ProductID,
ProductName,
CategoryID
FROM Products
WHERE CategoryID IS NULL
-- so we get this, 77 products:
SELECT Products.ProductID,
Products.ProductName,
Products.CategoryID,
Categories.CategoryID,
Categories.CategoryName
FROM Products
JOIN Categories ON Products.CategoryID = Categories.CategoryID
ORDER BY Products.ProductID
-- that's why there are 77 instead of 79
-- so what if we want to see that categoryID?
-- the boss says she wants to see
-- *every* product
-- *even if* it has a NULL value for the categoryID
-- in other words
-- "show *every* product, and if that product has a category
-- associated with it, show me that too"
-- this is where an outer join comes in
--
-- the above queries have been using a shorthand
-- they are really INNER JOINS
-- by default "JOIN" means "INNER JOIN"
-- the following two queries are logically equivalent
-- and ordering by categoryID
SELECT Products.ProductID,
Products.ProductName,
Products.CategoryID,
Categories.CategoryID,
Categories.CategoryName
FROM Products
JOIN Categories ON Products.CategoryID = Categories.CategoryID
ORDER BY Categories.CategoryID
--
SELECT Products.ProductID,
Products.ProductName,
Products.CategoryID,
Categories.CategoryID,
Categories.CategoryName
FROM Products
INNER JOIN Categories ON Products.CategoryID = Categories.CategoryID
ORDER BY Categories.CategoryID
-- and reformatting ...
-- this is the same as above, just reformatted, more specific
-- this is to get the two tables on the same line
-- and one table is on the left of the "INNER JOIN" phrase
-- and one table is on the right of the "INNER JOIN" phrase
SELECT Products.ProductID,
Products.ProductName,
Products.CategoryID,
Categories.CategoryID,
Categories.CategoryName
FROM Products INNER JOIN Categories
ON Products.CategoryID = Categories.CategoryID
ORDER BY Categories.CategoryID
-- now, a small change to see the one product
-- that doesn't satisfy the join
-- (at the bottom)
SELECT Products.ProductID,
Products.ProductName,
Products.CategoryID,
Categories.CategoryID,
Categories.CategoryName
FROM Products LEFT JOIN Categories -- the change is in this line
ON Products.CategoryID = Categories.CategoryID
ORDER BY Categories.CategoryID
-- the LEFT JOIN says:
-- "include all records from the table on the left"
-- in this case, Products is on the left,
-- so every product record is shown,
-- even if it doesn't satisfy the join condition
-- of course there is a RIGHT JOIN also
-- the next two queries are logically the same
SELECT Products.ProductID,
Products.ProductName,
Products.CategoryID,
Categories.CategoryID,
Categories.CategoryName
FROM Products LEFT JOIN Categories
ON Products.CategoryID = Categories.CategoryID
ORDER BY Categories.CategoryID
--
SELECT Products.ProductID,
Products.ProductName,
Products.CategoryID,
Categories.CategoryID,
Categories.CategoryName
FROM Categories RIGHT JOIN Products
ON Products.CategoryID = Categories.CategoryID
ORDER BY Products.ProductID
-- these are actually shorthand for
SELECT Products.ProductID,
Products.ProductName,
Products.CategoryID,
Categories.CategoryID,
Categories.CategoryName
FROM Products LEFT OUTER JOIN Categories
ON Products.CategoryID = Categories.CategoryID
ORDER BY Categories.CategoryID
-- but the OUTER keyword is typically omitted
-- ** notice that the ON statement does not change
-- across INNER, LEFT, LEFT OUTER, RIGHT, RIGHT OUTER joins
-- OUTER joins are also known as "inclusive" joins,
-- because they *include* records that don't satisfy the join condition
-- INNER joins are also know as "exclusive" joins,
-- because they *exclude* records that don't satisfy the join condition
-- so the above query finds the product with a NULL
-- categoryID
-- the product record doesn't satisfy the join
-- what about a category that doesn't satisfy the join?
SELECT COUNT(CategoryID) AS [Number of Categories in Categories table]
FROM Categories
-- ten categories
SELECT COUNT(DISTINCT CategoryID) AS [Number of Categories in Products table]
FROM Products
-- only eight appear in the products table
-- but which 8?
-- we can't look for a NULL foreign key...
-- with few records, we can inspect the actual records...
SELECT CategoryID
FROM Categories
ORDER BY CategoryID
SELECT DISTINCT CategoryID
FROM Products
ORDER BY CategoryID
-- but what if there are thousands of categories
-- and millions of products?
-- an outer join..
-- from the other side...
-- "inclusive" of all Categories
SELECT Categories.CategoryName,
Categories.CategoryID,
Products.CategoryID,
Products.productID,
Products.productName
FROM Products RIGHT JOIN Categories
ON Products.CategoryID= Categories.CategoryID
ORDER BY Categories.CategoryID,
Products.ProductID
-- so this includes *every* category,
-- even the ones that don't satisfy the join
-- and also shows the category/product combinations that do satisfy the join
-- a common use of an outer join is
-- to find records in one table that don't match
-- records from another table
-- this is sometimes called a "find unmatched" query
-- looking at the previous query again...
SELECT Categories.CategoryName,
Categories.CategoryID,
Products.CategoryID,
Products.productID,
Products.productName
FROM Products RIGHT JOIN Categories
ON Products.CategoryID= Categories.CategoryID
ORDER BY Categories.CategoryID,
Products.ProductID
-- and rephrasing it as a LEFT JOIN...
SELECT Categories.CategoryName,
Categories.CategoryID,
Products.CategoryID,
Products.productID,
Products.productName
FROM Categories LEFT JOIN Products
ON Products.CategoryID= Categories.CategoryID
ORDER BY Categories.CategoryID,
Products.ProductID
-- note the two categories with no matching products
-- they both have a NULL primary key, which can't be, right?
-- which is a sign that there really is no product record
-- so let's limit to only that
-- to limit to only the categories with no products
SELECT Categories.CAtegoryName,
Products.productID,
Products.productName
FROM Categories LEFT JOIN Products
ON Products.CategoryID= Categories.CategoryID
WHERE Products.productID IS NULL
ORDER BY Categories.CAtegoryName,
Products.productName
-- and formatting for readability
SELECT Categories.CAtegoryName,
Products.productID,
Products.productName
FROM Categories
LEFT JOIN Products ON Products.CategoryID = Categories.CategoryID
WHERE Products.productID IS NULL
ORDER BY Categories.CategoryName,
Products.productName
-- and now, a FULL OUTER JOIN
-- this shows *every* category
-- and *every* product
-- even the ones that don't satisfy the join
-- its the left and the right together
SELECT Categories.CategoryName,
Categories.CategoryID,
Products.CategoryID,
Products.productID,
Products.productName
FROM Products FULL OUTER JOIN Categories
ON Products.CategoryID= Categories.CategoryID
ORDER BY Categories.CAtegoryName,
Products.productName
-- and formatted consistently with the above examples
SELECT Categories.CategoryName,
Categories.CategoryID,
Products.CategoryID,
Products.productID,
Products.productName
FROM Products
FULL OUTER JOIN Categories ON Products.CategoryID= Categories.CategoryID
ORDER BY Categories.CAtegoryName,
Products.productName
-- Database by Doug
-- 2/12/2016
-- Intro to Outer Joins
No comments:
Post a Comment