Saturday, October 20, 2018

Finding Unmatched Records in SQL

-- Database by Doug
-- Douglas Kline
-- 10/19/2018
--
-- Finding unmatched records
-- LEFT JOIN... WHERE IS NULL

-- suppose you want to find records in one table 
-- that do not match records in another table

-- some practical examples:

-- products that are not in a category
--     (in Products table, but not in Categories)

-- Categories that have no products
--     (in Categories table, but not in Products table)

-- two main cases,
-- first case: look for the NULL

-- products that are not in a category
SELECT   ProductID
FROM     Products
WHERE    CategoryID IS NULL

-- this is fairly simple, because
-- everything we need is in a single table
-- we don't need to inspect another table
-- this is when the Foreign Key is
-- in the table of interest

-- Categories that have no products
-- this is tougher
-- since which Categories to display
-- depends on what's in the Products table

-- here's the pattern, then we'll build up 
-- to why it works

SELECT         Categories.CategoryName
FROM           Categories
   LEFT JOIN   Products ON Categories.CategoryID = Products.CategoryID
WHERE          Products.ProductID IS NULL

-- the pattern is
-- <table of interest> LEFT JOIN <unmatched table&gt
-- WHERE <unmatched table&gt.<primary key&gt IS NULL

-- consider this, normal join
SELECT         Categories.CategoryName,
               Categories.CategoryID,
               Products.CategoryID,
               Products.ProductID,
               Products.ProductName
FROM           Categories
        JOIN   Products ON Categories.CategoryID = Products.CategoryID
ORDER BY       Categories.CategoryName

-- note 77 records
-- now the LEFT JOIN

SELECT         Categories.CategoryName,
               Categories.CategoryID,
               Products.CategoryID,
               Products.ProductID,
               Products.ProductName
FROM           Categories
   LEFT JOIN   Products ON Categories.CategoryID = Products.CategoryID
ORDER BY       Categories.CategoryName

-- now 78 records
-- and note the Office Supplies category with no Product
-- the record has been replaced with a *NULL record*

-- the LEFT JOIN includes Categories without matching Products
-- Categories LEFT JOIN Products -- Categories is on the LEFT

-- how do I know that it's really a *NULL record*
-- and not just NULLs in the fields? which might be ok
-- because Products.ProductID is NULL, which can't happen for a PK

-- so, let's limit to just that record
SELECT         Categories.CategoryName,
               Categories.CategoryID,
               Products.CategoryID,
               Products.ProductID,
               Products.ProductName
FROM           Categories
   LEFT JOIN   Products ON Categories.CategoryID = Products.CategoryID
WHERE          Products.ProductID IS NULL
ORDER BY       Categories.CategoryName

-- and paring it back to the simple form
SELECT         Categories.CategoryName
FROM           Categories
   LEFT JOIN   Products ON Categories.CategoryID = Products.CategoryID
WHERE          Products.ProductID IS NULL
ORDER BY       Categories.CategoryName


-- Database by Doug
-- Douglas Kline
-- 10/19/2018
--
-- Finding unmatched records
-- LEFT JOIN... WHERE IS NULL



Time zones and the DATETIMEOFFSET data type in SQL

-- Database by Doug
-- Douglas Kline
-- 10/18/2018
-- Time Zones and DateTimeOffset

-- how to use time zones and convert from one to another

-- first, how to think about date/time/datetime in SQL
-- suppose I do this:

SELECT GETDATE() AS [now, somewhere]

-- the data type that is returned is a datetime
-- I can show this:
SELECT SQL_VARIANT_PROPERTY(GETDATE(), 'BaseType')

-- think of the datetime data type as a 
-- multi-part object that holds each 
-- part as a separate value

-- for example:
SELECT   GETDATE(),
         DATEPART(YEAR, GETDATE())        AS [year],
         DATEPART(MONTH, GETDATE())       AS [month],
         DATEPART(DAY, GETDATE())         AS [day],
         DATEPART(HOUR, GETDATE())        AS [hour],
         DATEPART(SECOND, GETDATE())      AS [second],
         DATEPART(MILLISECOND, GETDATE()) AS [millisecond]

-- note that all the parts are whole numbers
-- DATEPART() always returns whole numbers - no decimals

-- and when we manipulate datetimes, we use whole numbers:
SELECT   GETDATE()                     AS [now],
         DATEADD(DAY, 2, GETDATE())    AS [2 days from now],
         DATEADD(DAY, 2.5, GETDATE())  AS [2.5 days from now?],
         DATEADD(HOUR, 12, 
            DATEADD(DAY, 2, GETDATE()))AS [2.5 days from now]

-- look at that last entry again
            
-- notice that 2.5 was truncated to an integer
-- there is no such thing as a fractional part of a datetime

-- regardless of how SQL Server stores the date/time/datetime
-- under the covers
-- it treats each part as a separate integer value

-- so what about time zones?
-- since datetime (and datetime2) doesn't have a time zone part
-- we need a new data type: datetimeoffset

-- you can the server datetime with time zone like this:
SELECT SYSDATETIMEOFFSET(), 
       SQL_VARIANT_PROPERTY(SYSDATETIMEOFFSET(), 'BaseType')

-- my current database server happens to be
-- in the eastern time zone of the US, which is -04:00 UTC

-- note that the datetimeoffset data type
-- has everything that datetime does
-- but additionally, it has another part, the time zone

-- so how would I show the actual east coast time?

-- you might think that the function TODATETIMEOFFSET
-- would do that for you, but all it really does is
-- add the time zone to a datetime

SELECT TODATETIMEOFFSET(GETDATE(),'-04:00') AS [time on east coast?],
       SYSDATETIMEOFFSET()                  AS [time on east coast],
       CAST(GETDATE() AS DATETIMEOFFSET)    AS [time on east coast?] 

-- note that the hours are the same
-- it didn't really move time zones for you
-- it just added the time zone information you gave it

-- to actually adjust the hours, you need to
-- actually add/subtract the hours

SELECT   GETDATE()                        AS [implied time zone],
         TODATETIMEOFFSET
         (
            GETDATE(), 
            '-00:00'
         )                                AS [explicit UTC +00:00 time zone], 
         DATEADD(hour, -4, GETDATE())     AS [east coast time, implied time zone],
         TODATETIMEOFFSET
         (
            DATEADD(hour, -4, GETDATE()),
            '-04:00'
         )                                AS [east coast with time zone]

SELECT GETDATE(), SYSDATETIMEOFFSET()

-- in summary
-- DATETIMEOFFSET is a DATETIME with additional information: the time zone
-- TODATETIMEOFFSET converts from DATETIME to DATETIMEOFFSET
-- but doesn't add or subtract time
-- to take a datetime from one time zone, and show it in another
-- timezone, you have to add/subtract the hours yourself
-- and set the correct time zone in a DATETIMEOFFSET

-- Database by Doug
-- Douglas Kline
-- 10/18/2018
-- Time Zones and DateTimeOffset

Using ANY and ALL in SQL

-- Database by Doug
-- Douglas Kline
-- 10/19/2018
-- ANY and ALL

-- comparing a value to a column of values
-- (aka acomparing a scalar to a vector of scalars)

-- consider this setup for testing
-- if the WHERE condition is true or false

SELECT 1       AS [isTrue?]
WHERE  10 = 10 -- testing if this is true or false

SELECT 1       AS [isTrue?]
WHERE  10 <> 10 -- testing if this is true or false

-- we get a record if it is true, otherwise no record

-- also, see how I can create a literal table

SELECT   tempField
FROM     (VALUES(11),(12),(7)) tempTable(tempField)

-- note that this creates a single column of values
-- which could be used in something like IN
-- for example
SELECT   1
WHERE    12 IN    (  SELECT   tempField
                     FROM     (VALUES(11),(12),(7)) tempTable(tempField))

-- I could rephrase this as:
SELECT   1
WHERE    12 = ANY (  SELECT   tempField
                     FROM     (VALUES(11),(12),(7)) tempTable(tempField))

-- back to the first example:
SELECT 1          AS [isTrue?]
WHERE  10 > 11    

-- I'm comparing a single scalar value, 10, with another
-- single scalar value, 11

-- now consider if I want to compare the value 10
-- to multiple other values
SELECT 1 
WHERE  10 > 11
  OR   10 > 12
  OR   10 > 7

-- this is three logical expressions
-- combined with OR into the whole logical expression
-- so if ANY of them are true
-- the whole logical expression is true 

-- now let's say that the values 11, 12, 7 are in a column
-- since we're in an RDBMS, they are *likely* to be in a column

-- I can rephrase this with ANY like this:

SELECT 1
WHERE  10 > ANY (SELECT tempfield 
                 FROM (VALUES
                           (11),
                           (12),
                           (7)
                       ) AS tempTable(tempfield))

-- this is exactly equivalent to:

SELECT 1 
WHERE  10 > 11
  OR   10 > 12
  OR   10 > 7

-- back to the first example again...
-- back to the first example:
SELECT 1          AS [isTrue?]
WHERE  10 > 11    

-- let's say I want to do multiple comparisons again
-- but AND them together like this:

SELECT 1 
WHERE  10 > 11    -- logical expression 1
  AND  10 > 12    -- logical expression 2
  AND  10 > 7     -- logical expression 3

-- this is three logical expressions
-- combined with ANDs into the whole WHERE
-- logical expression
-- so all three expressions must be true
-- for the WHERE clause to be true

-- I can make it true with different values:
SELECT 1 
WHERE  10 > 9    -- logical expression 1
  AND  10 > 8    -- logical expression 2
  AND  10 > 7    -- logical expression 3

-- I can rephrase this with ALL
-- with the values in a column

SELECT 1
WHERE  10 > ALL (SELECT tempfield 
                 FROM (VALUES
                           (11),
                           (12),
                           (7)
                       ) AS tempTable(tempfield))
 
 -- and get it to be true with a change to the values
 SELECT 1
WHERE  10 > ALL (SELECT tempfield 
                 FROM (VALUES
                           (9),
                           (8),
                           (7)
                       ) AS tempTable(tempfield))

-- so you can think of ANY and ALL 
-- as comparison operator modifiers

-- comparison operators usually take a scalar value on each side

-- scalar  scalar

--  3 < 7
--  3 > 7
--  3 = 7
--  3 != 7
--  3 <> 7
--  3 <= 7
--  3 >= 7

-- ANY and ALL take a scalar on the left, and a column on the right

-- scalar  ANY column
-- scalar  ALL column
-- 3 < ANY ((1),(2),(3))
-- 3 >= ALL ((1),(2),(3))

-- and the column is generally created with a SELECT statement

-- scalar  ANY (SELECT  FROM...)
-- scalar  ALL (SELECT  FROM...)

-- so here's a more concrete example using the Northwind database Orders table

-- suppose we want to know a list of customers who paid more than
-- $200 on freight on an order in 1996?
-- in other words $200 < ANY(orders in 1996)

SELECT   CompanyName
FROM     Customers
WHERE    $200 < ANY (SELECT freight
                     FROM    Orders
                     WHERE   Orders.CustomerID = Customers.CustomerID
                        AND  YEAR(Orderdate) = 1996)
ORDER BY CompanyName

-- notice that this is a correlated subquery
-- it refers to the outer query (Customers.customerID)

-- also notice that this could be rephrased
-- with a JOIN DISTINCT

SELECT   DISTINCT Customers.CompanyName
FROM     Customers
   JOIN  Orders      ON Customers.CustomerID = Orders.CustomerID
WHERE    Orders.Freight > $200
   AND   YEAR(Orders.OrderDate) = 1996
ORDER BY CompanyName

-- is there a difference? why one and not the other
-- it depends

-- depending on your situation, one SQL phrasing might 
-- be clearer than another

-- depending on your data model, volume, statistics, indexes, etc.
-- one might be faster than the other
-- in other words, you might get different query plans

-- in general, I recommend writing your SQL in the clearest
-- manner possible
-- then carefully rephrasing to a better performing, but
-- perhaps less understandable, form if there is a performance issue

-- in summary,
-- ANY and ALL modify comparison operators
-- they succinctly compare a single scalar to
-- a column of scalars

-- thanks for watching!

-- Database by Doug
-- Douglas Kline
-- 10/19/2018
-- ANY and ALL


Monday, October 15, 2018

Using the CAST() function in SQL

-- Database by Doug
-- Douglas Kline
-- 10/10/2018
-- CAST - converting to a new data type

-- sometimes you need to change one data type to another data type

-- consider this:

SELECT '4.0'

-- I might want to deal with this as a number 
-- for example

SELECT '4.0' + 2.0

-- this works
-- even though they '4.0' is a varchar 
-- and 2.0 is a floating point number

-- the db does an *implicit* conversion
-- of the '4.0' to a floating point
-- then does the addition
-- and returns a floating point

-- we hope the db "knows what I mean"
-- and are *assuming* it ends up doing the right thing

-- for simple things, this mostly works
-- the db is pretty smart

-- however, high quality code doesn't normally
-- make assumptions, so let's be *explicit*

SELECT CAST('4.0' AS float) + 2.0

-- here, I'm using the CAST function to
-- *explicitly* change a varchar to a float
-- I'm not relying on the db "knowing what I mean"

-- you can generally CAST between data types
-- fairly freely

-- see the full matrix of allowable conversions
-- for SQL Server 17
-- here: https://docs.microsoft.com/en-us/sql/t-sql/data-types/data-type-conversion-database-engine?view=sql-server-2017

-- note that some to/from conversions are implicit / automatic
-- some are not allowed at all 
-- and some require explicit CASTs

-- here are a few common conversions you might want to do


-- converting numeric and dates to varchars
-- especially when needing to concatenat

SELECT 'Doug' + 1 AS [trying for Doug1]
SELECT 1 + 'Doug' AS [trying for 1Doug]
-- and the fixes
SELECT 'Doug' + CAST(1 AS VARCHAR)
SELECT CAST(1 AS VARCHAR) + 'Doug'

SELECT GETDATE() + 'Doug' -- error
SELECT CAST(GETDATE() AS VARCHAR) + 'Doug'

-- forcing specific types of operations

-- consider this

SELECT 3 / 2

-- notice that I get *integer* division
-- because both operands are integers

-- but what if I want to see 1.5 as the result?
-- I can fix literals easy enough
SELECT 3.0 / 2.0

-- but what about this:
SELECT UnitsInStock / unitsonorder
FROM   Products
WHERE  unitsOnorder <> 0

-- I'm getting integer division

-- here's how to get floating point division:

SELECT CAST(UnitsInStock AS FLOAT) / CAST(unitsonorder AS FLOAT)
FROM   Products
WHERE  unitsOnorder <> 0

-- another common conversion is from varchar to date/time

-- see the differences here:
SELECT '4/8/2018'
SELECT CAST('4/8/2018' AS DATE)
SELECT CAST('4/8/2018' AS DATETIME2)

-- and here:
SELECT '20180408 11:00'
SELECT CAST('20180408 11:00' AS DATE)
SELECT CAST('20180408 11:00' AS DATETIME2)

-- summary
-- for 'throw-away' code, the db will implicitly convert
-- for high quality code in a system
-- you should not rely on implicit conversions
-- instead, use CAST

-- Database by Doug
-- Douglas Kline
-- 10/10/2018
-- CAST - converting to a new data type

Wednesday, October 10, 2018

Using DISTINCT in SQL

-- Database by Doug
-- Douglas Kline
-- 10/9/2018
-- DISTINCT - removing duplicates, but in a "dumb" way

-- consider this

SELECT ProductID,
       ProductName,
       SupplierID
FROM   Products

-- this is a list of all products
-- note the repeats in the SupplierID

-- suppose I want top know the list of suppliers
-- in the Products table

SELECT   SupplierID
FROM     Products

-- again, notice the repeats
-- to remove the repeats, I can do this:

SELECT   DISTINCT SupplierID
FROM     Products

-- you might be saying, why not just do this:
SELECT   SupplierID
FROM     Suppliers

-- my response is: that's a different list
-- it's the list of all suppliers in the supplier table

-- What I'm looking for is the list of all suppliers
-- in the products table
-- in other words, all suppliers which we actually *use*

-- Let's look at another example, with a table I've created

SELECT   firstName
FROM     Person
ORDER BY firstName 

-- this is a list of all people's first names
-- as we scroll down, we'll start to see repeats
-- in other words, multiple people have the same first name

-- now let's say we want a list of all first distinct firstnames

SELECT   DISTINCT firstName
FROM     Person
ORDER BY firstName 

-- so now there isn't a record returned for every Person record
-- there's a record returned for every unique firstname
-- also note that NULL is considered to be a unique firstname

-- you might say, why not use Group By to do this?
-- like this:

SELECT   firstName
FROM     Person
GROUP BY firstName
ORDER BY firstName

-- logically, it returns the exact same records, and always will
-- however, GROUP BY does a lot more work
-- it actually sets up groups of records in preparation to 
-- calculate aggregates like SUM, COUNT, AVG, etc.

-- DISTINCT is much faster 
-- if it sees a value it has seen before, it just throws it out
-- in other words, it doesn't group the records
-- it just makes a list of unique values

-- so, don't use GROUP BY when what you really need is DISTINCT

-- alright, what if you want to count stuff?
SELECT   COUNT(ID)                  AS [# of people],
         COUNT(DISTINCT ID)         AS [# of distinct primary key values],
         COUNT(firstName)           AS [# of people with non-NULL firstnames],
         COUNT(DISTINCT firstName)  AS [# of distinct firstNames]
FROM     Person


-- note that the first two values are always the same, 
-- since primary key values are distinct aka unique

-- and also notice that there are 599 distinct first names, but recall

SELECT   DISTINCT firstName
FROM     Person
ORDER BY firstName 

-- so why is the COUNT(DISTINCT firstname) = 599
-- but DISTINCT firstname gives 600 records?

-- remember that COUNT counts non-NULL values

-- finally, DISTINCT is sort of "dumb", in that it doesn't
-- know anything about primary keys
-- or anything about the underlying table(s)
-- it only considers values from the fields you provide

-- consider this:

SELECT   DISTINCT firstname
FROM     Person

-- it doesn't give distinct Person records, just distinct firstnames
-- now this:

SELECT   DISTINCT lastname
FROM     Person

-- and the distinct applies to the *combination* 
-- of all the fields in the SELECT clause
-- in this example,
-- all distinct *combinations* of gender and firstname are shown

SELECT   DISTINCT gender,
         firstname
FROM     Person
ORDER BY gender,
         firstname

-- in summary,
-- distinct removes duplicates
-- it removes duplicates based on all fields in the SELECT list
-- when used with COUNT, it will not count duplicate values

-- thanks for watching!

-- Database by Doug
-- Douglas Kline
-- 10/9/2018
-- DISTINCT - removing duplicates, but in a "dumb" way



Thursday, October 4, 2018

Numeric Expressions in SQL


-- Database by Doug
-- Douglas Kline
-- 8/28/2018
-- Numeric Expressions - expressions that return a numeric data type

-- beginner level
-- simple numeric expression examples
-- and things to keep in mind

-- I might say 'db'
-- which is shorthand for 'database engine'
-- this is the server software that interprets
--   your SQL and performs actions

-- you can ask your database engine (db) to be your calculator:
SELECT   1 + 2 

-- if you are familar with spreadsheets
-- and how to type formulas
-- it is very similar

SELECT   1 + 2 

-- an expression is something that the db
-- will read, translate, and perform
-- so in the expression above
-- it reads a 1, then a plus sign, then a 2
-- interprets it as addition
-- performs the addition
-- and returns the results
--------------------------------------------------
SELECT   1 + 2

-- there are certain items in the expression

-- the plus sign (+) is an *operator*
-- more specifically, it is a *binary* operator
-- it defines an operation on two items (thus binary)
-- more specifically, the items on its left and right

-- the plus sign says to the db:
-- take the items on my left and right and add them
-- it is an instruction to the db

SELECT   1 + 2

-- the 1 and 2 are the *operands* of the operator
-- they are the items that the operator applies to

-- the 1 and 2 are also examples of *literals*
-- in other words 1 literally means the number 1
-- in coding, it means: not code, a value

-- here is an example of a unary operator (one operand)
SELECT  -5

-- here, the literal value 5
-- is modified to become negative
-- by the minus sign operator

------------------------------------------------------------
-- how does the db know what *is* an expression, and what *isn't*
-- in this example
SELECT   1 + 2

-- the db is *expecting* an expression to be in this position
-- (after the word SELECT)
-- so expressions are mainly known by their
-- location/position in your SELECT statement

---------------------------------------------------------
-- notice that usual order of operations is followed
-- * and / come before + and -
-- otherwise it is left-to-right,
-- unless you add parentheses
-- then sub-expressions in parentheses are evaluated first
SELECT 1 + 2 * 6
SELECT (1 + 2) * 6
SELECT (3 - 1) / 2

------------------------------------------------------
-- notice that operators sometimes change meaning
-- depending on their context

SELECT 1 / 3      -- does integer division
SELECT 1.0 / 3.0  -- floating point division
SELECT 1 / 3.0    -- floating point division
SELECT 1.0 / 3    -- floating point division

-- if both operands (items on either side of the operator)
-- are whole numbers, do integer division
-- otherwise do floating point division

-----------------------------------------------------------------------------
-- here's an operator that might be new to you: %
SELECT 5 % 3 -- modulus operator - gives the remainder after integer division
SELECT 8 % 3 -- same answer

SELECT (8 / 3) * 3 + 8 % 3 -- equals 8

-----------------------------------------------------------------------------
-- what if your data is not a literal?
-- in other words, it comes out of the db

SELECT   unitsInstock -- this is an expression, just very simple
FROM     Products

SELECT   unitsInStock + unitsOnOrder -- effective inventory
FROM     Products

-- plus is still the operator
-- the operands are UnitsInStock and unitsOnOrder
-- the expression is evaluated on each record, separately

-----------------------------------------------------
-- but what if the data type is not right for
--   the operations I want to do?
-- specifically, the data is whole numbers
-- but I want to do floating point division
SELECT   unitsInStock,
         reOrderLevel,
         unitsinstock / reOrderLevel
FROM     Products
WHERE    reOrderLevel <> 0  -- to avoid division by zero

-- but I'd really like to see the results as a floating point/decimal

-- you have to convert *prior* to dividing

SELECT   unitsInStock,
         reOrderLevel,
         CONVERT(real,unitsinstock) / reOrderLevel
FROM     Products
WHERE    reOrderLevel <> 0  -- to avoid division by zero

-- the unitsInStock is converted to a real (aka floating point number)
-- before it gets divided
-- since one of the items operands
-- is a floating point number, it does floating point division

-------------------------------------------------------------
-- perhaps a more realistic and useful expression
SELECT      ProductID,
            ProductName,
            unitsInStock * unitprice AS [Dollar Value in Inventory]
FROM        Products
ORDER BY    ProductID

-- note that unitprice is a money data type,
-- and the unitsInstock is a whole number
-- so the result is shown as a money data type
-- (note the 2 digits to the right of the decimal)

-- we can show this explicitly:
SELECT      SQL_VARIANT_PROPERTY(unitPrice, 'BaseType'),
            SQL_VARIANT_PROPERTY(unitsInStock, 'BaseType'),
            SQL_VARIANT_PROPERTY(unitsInStock * unitPrice, 'BaseType')
FROM        Products

-----------------------------------------------------------------------
-- in summary
-- expressions are instructions for the db to create new values
-- expressions are known by their location in the statement
-- expressions have operators and operands

-- Database by Doug
-- Douglas Kline
-- 8/28/2018
-- Numeric Expressions

String Expressions in SQL

-- Database by Doug
-- Douglas Kline
-- 8/28/2018
-- Character Expressions - expressions that return character data

-- beginner level
-- might want to check out numeric expressions
------------------------------------------------------------------
-- consider this statement

SELECT   'Fred',
         'Flintstone'

-- note that the single quotes are not shown in
-- the table results
-- this is because that are not *part of* the data
-- they are used to mark the beginning and end of the data
-- and are called *delimiters*

-- so the statement has two pieces of data
-- delimited by single quotes

SELECT   'Fred',
         'Flintstone'

-- the *type* of data is *character* data
-- more specifically, a *sequence of characters*

-- in programming, they would be called *strings*

-- another term might be *string literals*

-- now look at this:
SELECT      123   AS [numeric data type],
            '123' AS [character data type]

-- the first column is a number
-- the second column is a sequence of characters
--    first the character '1', then the character '2', ...

-- now consider this

SELECT       1   + 5  AS [numeric],
            '1' + '5' AS [character]
             
-- see how the plus sign means something different
-- depending on what's on either side?

-- with character data, the plus sign means *concatenate*
-- or 'put together'

-- so the basic operator for character expressions
-- is the plus sign

-- we can prove this:
SELECT      SQL_VARIANT_PROPERTY(1 + 5, 'BaseType'),
            SQL_VARIANT_PROPERTY('1' + '5', 'BaseType')

-- here'a another example:
SELECT   'Fred' + ' ' + 'Flintstone' AS [Full Name]

-------------------------------------------------------------
-- there's lots more we want to do with
-- character data
-- but we need functions....
-- here are a few simple ones:

SELECT LEN  (' Fred'),     -- find how many chars - note the spaces
       UPPER(' Fred'),     -- convert all chars to upper case
       LOWER(' Fred'),     -- convert all chars to lower case
       LEFT (' Fred', 2),  -- return just the left-most 2 chars
       RIGHT(' Fred', 2),  -- return just the right-most 2 chars
       LTRIM(' Fred')      -- get rid of any space chars on left

-- you might also want to find the location of a certain character

SELECT   CHARINDEX(' ', 'Fred Flintstone') AS [location of space character] 

-- and also, you can combine these functions into a more complex expression

SELECT   'Fred Flintstone'       AS [original full name],
         RIGHT('Fred Flintstone',                    -- right-most 10 characters
                 LEN('Fred Flintstone')              -- the number 15
                 - CHARINDEX(' ', 'Fred Flintstone') -- the number 5
              )                  AS [last name only]

---------------------------------------------------------------------------------
-- what about data from a table - not literals

SELECT   ProductName,
         LEN(ProductName) AS [length of name],
         UPPER(ProductName) AS [uppercase],
         CHARINDEX(' ', ProductName),
         RIGHT(ProductName, LEN(ProductName) - CHARINDEX(' ', ProductName))
FROM     Products

---------------------------------------------------------------------------------
-- in summary
-- string literals are delimited by single quotes
-- plus sign means concatenate
-- functions help with other tasks

-- Database by Doug
-- Douglas Kline
-- 8/28/2018
-- Character Expressions


Primary Keys in Relational Databases

-- Database by Doug
-- Douglas Kline
-- 9/28/2018
-- Primary Keys

USE Northwind
-- note that I'm starting out with a 
-- modified version of the Northwind.Categories table
-- called DBBDCategories
-- for demo purposes

-- Primary Keys
-- a primary key has two purposes: 
---- to guarantee access to every record in a table
---- to uniquely distinguish a record from all other records in the table

-- this is done by requiring a primary key to be:
---- NOT NULL
---- UNIQUE across all records in that table

-- the NOT NULL makes sure that *at least one* column is NOT NULL
-- and that there is a WHERE clause that will match on every record
-- for example:
--  WHERE categoryID = 7

-- guarantees one of:
-- categories with an categoryID of 7
-- there is no record with an *unknown* ID that *might be* 7

-- the UNIQUE makes sure that you can make a request of records
-- that guarantees at most one record
-- for example:
--  WHERE categoryID = 7

-- guarantees one of:
-- a *single* record with the category with an categoryID of 7
-- no category - there is definitively no record with an categoryID of 7

--***************************************************
-- tables don't absolutely require a primary key
-- however, guaranteed, unique access is extremely helpful in many situations
-- (particularly Online Transaction Processing Systems /OLTPs)
-- AND, there can be significant performance benefits to having a PK 
-- (for certain operations)

-- Consider my somewhat modified DBBDCategories table from Northwind
SELECT *
FROM    DBBDCategories

-- I happen to know that CategoryID is the primary key of this table

-- but how would you know otherwise?

-- you could look in the object inspector on the left
-- expand the database, the tables, the Categories table, the Columns
-- note the key icon on the left of the CategoryID, and the 'PK' in parentheses
-- (demo)

-- another way would be to put the table into a diagram
-- note the yellow key icon next to the CategoryID
-- (demo)

-- Finally, a really definitive way would be to script out the table
-- and see the full definition of the table in SQL
-- (demo)

-- here is a slightly abbreviated/edited version of 
-- the SQL CREATE statment for the DBBDCategories table:

--CREATE TABLE [dbo].[DBBDCategories]
--(
-- [CategoryID] [int] NOT NULL,
-- [CategoryName] [nvarchar](15) NOT NULL,
-- [Description] [ntext] NULL,
-- [Picture] [image] NULL,
-- CONSTRAINT [PK_DBBDCategories] PRIMARY KEY CLUSTERED 
--   (
--    [CategoryID] ASC
--   )
--) 

-- note the NOT NULL for CategoryID 
-- this means that every insert and update will be checked to make sure 
-- that the CategoryID does not become NULL
-- let's try it
-- demo insert new NULL PK
-- demo update PK to a NULL

-- note the PRIMARY KEY specified as a CONSTRAINT
-- this means that every insert and update will be checked to make sure
-- that no two CategoryIDs, in the WHOLE TABLE, get duplicated
-- let's try it
-- demo insert duplicate PK
-- demo update PK to a duplicate value


-- let's review
-- we are guaranteed that no record in the categories table has an unknown CategoryID
-- we are guaranteed that no two records in the categories table have the same CategoryID

-- in other words
-- WHERE CategoryID = 8
-- guarantees to return either no record, or one record
-- and we are guaranteed that there is no record
-- with an *unknown* CategoryID which is actually 8

-- so what does a categoryID of 8 actually mean?
--....?
--....?





-- NOTHING!!

-- primary keys don't *need* to mean anything
-- they just have to be UNIQUE and NOT NULL

-- now let's look at the definition of 
-- the *actual* Categories table from the Northwind DB

--CREATE TABLE [dbo].[Categories]
--(
-- [CategoryID] [int] IDENTITY(1,1) NOT NULL,  -- ** note IDENTITY(1,1)
-- [CategoryName] [nvarchar](15) NOT NULL,
-- [Description] [ntext] NULL,
-- [Picture] [image] NULL,
-- CONSTRAINT [PK_Categories] PRIMARY KEY CLUSTERED -- ** note PRIMARY KEY
-- (
--  [CategoryID] ASC
-- )
--) ON [PRIMARY] 

-- note the PRIMARY KEY in place of UNIQUE
-- these are interchangeable, they mean the same thing

-- however, also note the IDENTITY(1,1)

-- this is an example of defining a *synthetic* primary key
-- we are relinquishing to the DB engine,
-- the task of providing  PK values for this column

-- specifically, the first PK value should be 1
-- the second PK value should be 1 + 1
-- the third PK value should be  2 + 1
-- the fourth PK value should be 3 + 1
-- in other words, begin with 1, and increment by 1 each time

-- this also means that NO ONE ELSE, besides the DB engine
-- can change the value of Categories.CategoryID

-- (demo) try to change a PK value
-- (demo) try to insert a PK value

-- also, IDENTITY values are never re-used
-- this guarantees uniqueness across the live table and all archives
-- for example, if we archive CategoryID = 7 (move to another place)
-- then end up reusing CategoryID = 7 for another record
-- those two records can be confused for each other

 -- (demo) create a new record, delete it, show next PK value, skipped

 -- why is this good?
 -- HUMANS might accidentally re-use archived PK values
 -- also, columns that *mean* something may need to be changed
 -- it does exactly what is needed, and no more
 -- internal to the db engine, this can significantly improve performance

-- in summary,
-- primary keys need to be UNIQUE and NOT NULL, and nothing more
-- this guarantees unique access to each record
-- *synthetic* keys can be helpful in keeping all key values UNIQUE

-- this is especially helpful in OLTP type systems

-- foreshadowing ...
-- primary keys also are referenced by FOREIGN KEYS
-- see the video on FOREIGN KEYS!

-- thanks for watching!

-- Database by Doug
-- Douglas Kline
-- 9/28/2018
-- Primary Keys

Foreign Keys in Relational Databases


-- Database by Doug
-- Douglas Kline
-- 10/1/2018
-- Foreign Keys

USE Northwind
-- Foreign Keys
-- a field in one table (table A)
-- that references a primary key value in another table (table B)

-- for example, the Categories and Products table from Northwind
-- (show diagram)

-- in this case, the foreign key is
-- Products.CategoryID, i.e., the CategoryID field in Products

-- from a practical usage standpoint
-- the foreign key is a "lookup"
-- consider this:

SELECT   ProductID,
         ProductName,
         CategoryID
FROM     Products

SELECT   CategoryID,
         CategoryName
FROM     Categories

-- note how I can use the Products.CategoryID
-- value to find the CategoryName of the right Category

-- note that Primary Keys are UNIQUE and NOT NULL
-- e.g., Products.ProductID, Categories.CategoryID
-- but Foreign Keys can be NOT UNIQUE and NULL
-- e.g., Products.CategoryID

-- in this situation, many products can be
-- in the same category
-- and a product could have an unknown category
-- (products can exist without a category)

-- in a sense, FKs define a "relationship"
-- between two records
-- in this case, each product record is related
-- to a category record
-- (or not, in the case of NULL FK)

-- to be able to "lookup" from one table to another
-- you just need two fields of similar data types

-- however, what if you want to make sure 
-- that the lookups values are always *good*
-- in other words, that values in
-- Products.CategoryID are actually over in
-- the Categories table?

-- this requirement, that FK values are valid
-- is called *referential integrity*

-- note that this is a constraint on
-- the Products.CategoryID field
-- it must be a valid value that
-- exists over in the Categories.CategoryID field

-- let's see it in the definition of Products

CREATE TABLE [dbo].[Products]
(
   [ProductID]    [int] IDENTITY(1,1)  NOT NULL,
 [ProductName]  [nvarchar](40)       NOT NULL,
 [CategoryID]   [int]                NULL
   -- other fields omitted for clarity
 CONSTRAINT [PK_Products] 
   PRIMARY KEY CLUSTERED 
   (
    [ProductID] ASC
   )
 CONSTRAINT [FK_Products_Categories]
   FOREIGN KEY
   (
      [CategoryID]
   )
   REFERENCES [dbo].[Categories] ([CategoryID])
) 

-- note the REFERENCES section
-- note the allowed NULL for CategoryID

-- this means that on any UPDATE or INSERT to Products
-- a Product record must always have a valid CategoryID
-- or a NULL value (unknown)

-- (demo this)

-- it also means that on any UPDATE or DELETE to Categories
-- all Product records must always have a valid CategoryID
-- or a NULL value (unknown)

-- (demo this)

-- If we have referential integrity for all Product records
-- then we can be confident that our JOINs between the two tables
-- provide valid results

SELECT   Products.ProductID,
         Products.ProductName,
         Products.CategoryID,
         Categories.CategoryID,
         Categories.CategoryName
FROM     Products 
   JOIN  Categories ON Products.CategoryID = Categories.CategoryID
ORDER BY Products.ProductID

-- JOINs are a way of doing multiple "lookups"
-- of FK values to PK values
-- but that's another video...

-- in summary,
-- foreign keys provide a "lookup" mechanism
-- for records in two different tables
-- this also defines how the two tables are "related"
-- *referential integrity* enforces "good" foreign keys
-- in other words, a FK should always have a valid value
-- FK values can repeat - be non-UNIQUE
-- FK values can be NULL - unknown

-- thanks for watching!

-- Database by Doug
-- Douglas Kline
-- 10/1/2018
-- Foreign Keys

Followers