-- Database by Doug
-- Douglas Kline
-- 10/19/2018
--
-- Finding unmatched records
-- LEFT JOIN... WHERE IS NULL
-- suppose you want to find records in one table
-- that do not match records in another table
-- some practical examples:
-- products that are not in a category
-- (in Products table, but not in Categories)
-- Categories that have no products
-- (in Categories table, but not in Products table)
-- two main cases,
-- first case: look for the NULL
-- products that are not in a category
SELECT ProductID
FROM Products
WHERE CategoryID IS NULL
-- this is fairly simple, because
-- everything we need is in a single table
-- we don't need to inspect another table
-- this is when the Foreign Key is
-- in the table of interest
-- Categories that have no products
-- this is tougher
-- since which Categories to display
-- depends on what's in the Products table
-- here's the pattern, then we'll build up
-- to why it works
SELECT Categories.CategoryName
FROM Categories
LEFT JOIN Products ON Categories.CategoryID = Products.CategoryID
WHERE Products.ProductID IS NULL
-- the pattern is
-- <table of interest> LEFT JOIN <unmatched table>
-- WHERE <unmatched table>.<primary key> IS NULL
-- consider this, normal join
SELECT Categories.CategoryName,
Categories.CategoryID,
Products.CategoryID,
Products.ProductID,
Products.ProductName
FROM Categories
JOIN Products ON Categories.CategoryID = Products.CategoryID
ORDER BY Categories.CategoryName
-- note 77 records
-- now the LEFT JOIN
SELECT Categories.CategoryName,
Categories.CategoryID,
Products.CategoryID,
Products.ProductID,
Products.ProductName
FROM Categories
LEFT JOIN Products ON Categories.CategoryID = Products.CategoryID
ORDER BY Categories.CategoryName
-- now 78 records
-- and note the Office Supplies category with no Product
-- the record has been replaced with a *NULL record*
-- the LEFT JOIN includes Categories without matching Products
-- Categories LEFT JOIN Products -- Categories is on the LEFT
-- how do I know that it's really a *NULL record*
-- and not just NULLs in the fields? which might be ok
-- because Products.ProductID is NULL, which can't happen for a PK
-- so, let's limit to just that record
SELECT Categories.CategoryName,
Categories.CategoryID,
Products.CategoryID,
Products.ProductID,
Products.ProductName
FROM Categories
LEFT JOIN Products ON Categories.CategoryID = Products.CategoryID
WHERE Products.ProductID IS NULL
ORDER BY Categories.CategoryName
-- and paring it back to the simple form
SELECT Categories.CategoryName
FROM Categories
LEFT JOIN Products ON Categories.CategoryID = Products.CategoryID
WHERE Products.ProductID IS NULL
ORDER BY Categories.CategoryName
-- Database by Doug
-- Douglas Kline
-- 10/19/2018
--
-- Finding unmatched records
-- LEFT JOIN... WHERE IS NULL
Saturday, October 20, 2018
Finding Unmatched Records in SQL
Time zones and the DATETIMEOFFSET data type in SQL
-- Database by Doug
-- Douglas Kline
-- 10/18/2018
-- Time Zones and DateTimeOffset
-- how to use time zones and convert from one to another
-- first, how to think about date/time/datetime in SQL
-- suppose I do this:
SELECT GETDATE() AS [now, somewhere]
-- the data type that is returned is a datetime
-- I can show this:
SELECT SQL_VARIANT_PROPERTY(GETDATE(), 'BaseType')
-- think of the datetime data type as a
-- multi-part object that holds each
-- part as a separate value
-- for example:
SELECT GETDATE(),
DATEPART(YEAR, GETDATE()) AS [year],
DATEPART(MONTH, GETDATE()) AS [month],
DATEPART(DAY, GETDATE()) AS [day],
DATEPART(HOUR, GETDATE()) AS [hour],
DATEPART(SECOND, GETDATE()) AS [second],
DATEPART(MILLISECOND, GETDATE()) AS [millisecond]
-- note that all the parts are whole numbers
-- DATEPART() always returns whole numbers - no decimals
-- and when we manipulate datetimes, we use whole numbers:
SELECT GETDATE() AS [now],
DATEADD(DAY, 2, GETDATE()) AS [2 days from now],
DATEADD(DAY, 2.5, GETDATE()) AS [2.5 days from now?],
DATEADD(HOUR, 12,
DATEADD(DAY, 2, GETDATE()))AS [2.5 days from now]
-- look at that last entry again
-- notice that 2.5 was truncated to an integer
-- there is no such thing as a fractional part of a datetime
-- regardless of how SQL Server stores the date/time/datetime
-- under the covers
-- it treats each part as a separate integer value
-- so what about time zones?
-- since datetime (and datetime2) doesn't have a time zone part
-- we need a new data type: datetimeoffset
-- you can the server datetime with time zone like this:
SELECT SYSDATETIMEOFFSET(),
SQL_VARIANT_PROPERTY(SYSDATETIMEOFFSET(), 'BaseType')
-- my current database server happens to be
-- in the eastern time zone of the US, which is -04:00 UTC
-- note that the datetimeoffset data type
-- has everything that datetime does
-- but additionally, it has another part, the time zone
-- so how would I show the actual east coast time?
-- you might think that the function TODATETIMEOFFSET
-- would do that for you, but all it really does is
-- add the time zone to a datetime
SELECT TODATETIMEOFFSET(GETDATE(),'-04:00') AS [time on east coast?],
SYSDATETIMEOFFSET() AS [time on east coast],
CAST(GETDATE() AS DATETIMEOFFSET) AS [time on east coast?]
-- note that the hours are the same
-- it didn't really move time zones for you
-- it just added the time zone information you gave it
-- to actually adjust the hours, you need to
-- actually add/subtract the hours
SELECT GETDATE() AS [implied time zone],
TODATETIMEOFFSET
(
GETDATE(),
'-00:00'
) AS [explicit UTC +00:00 time zone],
DATEADD(hour, -4, GETDATE()) AS [east coast time, implied time zone],
TODATETIMEOFFSET
(
DATEADD(hour, -4, GETDATE()),
'-04:00'
) AS [east coast with time zone]
SELECT GETDATE(), SYSDATETIMEOFFSET()
-- in summary
-- DATETIMEOFFSET is a DATETIME with additional information: the time zone
-- TODATETIMEOFFSET converts from DATETIME to DATETIMEOFFSET
-- but doesn't add or subtract time
-- to take a datetime from one time zone, and show it in another
-- timezone, you have to add/subtract the hours yourself
-- and set the correct time zone in a DATETIMEOFFSET
-- Database by Doug
-- Douglas Kline
-- 10/18/2018
-- Time Zones and DateTimeOffset
Using ANY and ALL in SQL
-- Database by Doug
-- Douglas Kline
-- 10/19/2018
-- ANY and ALL
-- comparing a value to a column of values
-- (aka acomparing a scalar to a vector of scalars)
-- consider this setup for testing
-- if the WHERE condition is true or false
SELECT 1 AS [isTrue?]
WHERE 10 = 10 -- testing if this is true or false
SELECT 1 AS [isTrue?]
WHERE 10 <> 10 -- testing if this is true or false
-- we get a record if it is true, otherwise no record
-- also, see how I can create a literal table
SELECT tempField
FROM (VALUES(11),(12),(7)) tempTable(tempField)
-- note that this creates a single column of values
-- which could be used in something like IN
-- for example
SELECT 1
WHERE 12 IN ( SELECT tempField
FROM (VALUES(11),(12),(7)) tempTable(tempField))
-- I could rephrase this as:
SELECT 1
WHERE 12 = ANY ( SELECT tempField
FROM (VALUES(11),(12),(7)) tempTable(tempField))
-- back to the first example:
SELECT 1 AS [isTrue?]
WHERE 10 > 11
-- I'm comparing a single scalar value, 10, with another
-- single scalar value, 11
-- now consider if I want to compare the value 10
-- to multiple other values
SELECT 1
WHERE 10 > 11
OR 10 > 12
OR 10 > 7
-- this is three logical expressions
-- combined with OR into the whole logical expression
-- so if ANY of them are true
-- the whole logical expression is true
-- now let's say that the values 11, 12, 7 are in a column
-- since we're in an RDBMS, they are *likely* to be in a column
-- I can rephrase this with ANY like this:
SELECT 1
WHERE 10 > ANY (SELECT tempfield
FROM (VALUES
(11),
(12),
(7)
) AS tempTable(tempfield))
-- this is exactly equivalent to:
SELECT 1
WHERE 10 > 11
OR 10 > 12
OR 10 > 7
-- back to the first example again...
-- back to the first example:
SELECT 1 AS [isTrue?]
WHERE 10 > 11
-- let's say I want to do multiple comparisons again
-- but AND them together like this:
SELECT 1
WHERE 10 > 11 -- logical expression 1
AND 10 > 12 -- logical expression 2
AND 10 > 7 -- logical expression 3
-- this is three logical expressions
-- combined with ANDs into the whole WHERE
-- logical expression
-- so all three expressions must be true
-- for the WHERE clause to be true
-- I can make it true with different values:
SELECT 1
WHERE 10 > 9 -- logical expression 1
AND 10 > 8 -- logical expression 2
AND 10 > 7 -- logical expression 3
-- I can rephrase this with ALL
-- with the values in a column
SELECT 1
WHERE 10 > ALL (SELECT tempfield
FROM (VALUES
(11),
(12),
(7)
) AS tempTable(tempfield))
-- and get it to be true with a change to the values
SELECT 1
WHERE 10 > ALL (SELECT tempfield
FROM (VALUES
(9),
(8),
(7)
) AS tempTable(tempfield))
-- so you can think of ANY and ALL
-- as comparison operator modifiers
-- comparison operators usually take a scalar value on each side
-- scalar scalar
-- 3 < 7
-- 3 > 7
-- 3 = 7
-- 3 != 7
-- 3 <> 7
-- 3 <= 7
-- 3 >= 7
-- ANY and ALL take a scalar on the left, and a column on the right
-- scalar ANY column
-- scalar ALL column
-- 3 < ANY ((1),(2),(3))
-- 3 >= ALL ((1),(2),(3))
-- and the column is generally created with a SELECT statement
-- scalar ANY (SELECT FROM...)
-- scalar ALL (SELECT FROM...)
-- so here's a more concrete example using the Northwind database Orders table
-- suppose we want to know a list of customers who paid more than
-- $200 on freight on an order in 1996?
-- in other words $200 < ANY(orders in 1996)
SELECT CompanyName
FROM Customers
WHERE $200 < ANY (SELECT freight
FROM Orders
WHERE Orders.CustomerID = Customers.CustomerID
AND YEAR(Orderdate) = 1996)
ORDER BY CompanyName
-- notice that this is a correlated subquery
-- it refers to the outer query (Customers.customerID)
-- also notice that this could be rephrased
-- with a JOIN DISTINCT
SELECT DISTINCT Customers.CompanyName
FROM Customers
JOIN Orders ON Customers.CustomerID = Orders.CustomerID
WHERE Orders.Freight > $200
AND YEAR(Orders.OrderDate) = 1996
ORDER BY CompanyName
-- is there a difference? why one and not the other
-- it depends
-- depending on your situation, one SQL phrasing might
-- be clearer than another
-- depending on your data model, volume, statistics, indexes, etc.
-- one might be faster than the other
-- in other words, you might get different query plans
-- in general, I recommend writing your SQL in the clearest
-- manner possible
-- then carefully rephrasing to a better performing, but
-- perhaps less understandable, form if there is a performance issue
-- in summary,
-- ANY and ALL modify comparison operators
-- they succinctly compare a single scalar to
-- a column of scalars
-- thanks for watching!
-- Database by Doug
-- Douglas Kline
-- 10/19/2018
-- ANY and ALL
Monday, October 15, 2018
Using the CAST() function in SQL
-- Database by Doug
-- Douglas Kline
-- 10/10/2018
-- CAST - converting to a new data type
-- sometimes you need to change one data type to another data type
-- consider this:
SELECT '4.0'
-- I might want to deal with this as a number
-- for example
SELECT '4.0' + 2.0
-- this works
-- even though they '4.0' is a varchar
-- and 2.0 is a floating point number
-- the db does an *implicit* conversion
-- of the '4.0' to a floating point
-- then does the addition
-- and returns a floating point
-- we hope the db "knows what I mean"
-- and are *assuming* it ends up doing the right thing
-- for simple things, this mostly works
-- the db is pretty smart
-- however, high quality code doesn't normally
-- make assumptions, so let's be *explicit*
SELECT CAST('4.0' AS float) + 2.0
-- here, I'm using the CAST function to
-- *explicitly* change a varchar to a float
-- I'm not relying on the db "knowing what I mean"
-- you can generally CAST between data types
-- fairly freely
-- see the full matrix of allowable conversions
-- for SQL Server 17
-- here: https://docs.microsoft.com/en-us/sql/t-sql/data-types/data-type-conversion-database-engine?view=sql-server-2017
-- note that some to/from conversions are implicit / automatic
-- some are not allowed at all
-- and some require explicit CASTs
-- here are a few common conversions you might want to do
-- converting numeric and dates to varchars
-- especially when needing to concatenat
SELECT 'Doug' + 1 AS [trying for Doug1]
SELECT 1 + 'Doug' AS [trying for 1Doug]
-- and the fixes
SELECT 'Doug' + CAST(1 AS VARCHAR)
SELECT CAST(1 AS VARCHAR) + 'Doug'
SELECT GETDATE() + 'Doug' -- error
SELECT CAST(GETDATE() AS VARCHAR) + 'Doug'
-- forcing specific types of operations
-- consider this
SELECT 3 / 2
-- notice that I get *integer* division
-- because both operands are integers
-- but what if I want to see 1.5 as the result?
-- I can fix literals easy enough
SELECT 3.0 / 2.0
-- but what about this:
SELECT UnitsInStock / unitsonorder
FROM Products
WHERE unitsOnorder <> 0
-- I'm getting integer division
-- here's how to get floating point division:
SELECT CAST(UnitsInStock AS FLOAT) / CAST(unitsonorder AS FLOAT)
FROM Products
WHERE unitsOnorder <> 0
-- another common conversion is from varchar to date/time
-- see the differences here:
SELECT '4/8/2018'
SELECT CAST('4/8/2018' AS DATE)
SELECT CAST('4/8/2018' AS DATETIME2)
-- and here:
SELECT '20180408 11:00'
SELECT CAST('20180408 11:00' AS DATE)
SELECT CAST('20180408 11:00' AS DATETIME2)
-- summary
-- for 'throw-away' code, the db will implicitly convert
-- for high quality code in a system
-- you should not rely on implicit conversions
-- instead, use CAST
-- Database by Doug
-- Douglas Kline
-- 10/10/2018
-- CAST - converting to a new data type
Wednesday, October 10, 2018
Using DISTINCT in SQL
-- Database by Doug
-- Douglas Kline
-- 10/9/2018
-- DISTINCT - removing duplicates, but in a "dumb" way
-- consider this
SELECT ProductID,
ProductName,
SupplierID
FROM Products
-- this is a list of all products
-- note the repeats in the SupplierID
-- suppose I want top know the list of suppliers
-- in the Products table
SELECT SupplierID
FROM Products
-- again, notice the repeats
-- to remove the repeats, I can do this:
SELECT DISTINCT SupplierID
FROM Products
-- you might be saying, why not just do this:
SELECT SupplierID
FROM Suppliers
-- my response is: that's a different list
-- it's the list of all suppliers in the supplier table
-- What I'm looking for is the list of all suppliers
-- in the products table
-- in other words, all suppliers which we actually *use*
-- Let's look at another example, with a table I've created
SELECT firstName
FROM Person
ORDER BY firstName
-- this is a list of all people's first names
-- as we scroll down, we'll start to see repeats
-- in other words, multiple people have the same first name
-- now let's say we want a list of all first distinct firstnames
SELECT DISTINCT firstName
FROM Person
ORDER BY firstName
-- so now there isn't a record returned for every Person record
-- there's a record returned for every unique firstname
-- also note that NULL is considered to be a unique firstname
-- you might say, why not use Group By to do this?
-- like this:
SELECT firstName
FROM Person
GROUP BY firstName
ORDER BY firstName
-- logically, it returns the exact same records, and always will
-- however, GROUP BY does a lot more work
-- it actually sets up groups of records in preparation to
-- calculate aggregates like SUM, COUNT, AVG, etc.
-- DISTINCT is much faster
-- if it sees a value it has seen before, it just throws it out
-- in other words, it doesn't group the records
-- it just makes a list of unique values
-- so, don't use GROUP BY when what you really need is DISTINCT
-- alright, what if you want to count stuff?
SELECT COUNT(ID) AS [# of people],
COUNT(DISTINCT ID) AS [# of distinct primary key values],
COUNT(firstName) AS [# of people with non-NULL firstnames],
COUNT(DISTINCT firstName) AS [# of distinct firstNames]
FROM Person
-- note that the first two values are always the same,
-- since primary key values are distinct aka unique
-- and also notice that there are 599 distinct first names, but recall
SELECT DISTINCT firstName
FROM Person
ORDER BY firstName
-- so why is the COUNT(DISTINCT firstname) = 599
-- but DISTINCT firstname gives 600 records?
-- remember that COUNT counts non-NULL values
-- finally, DISTINCT is sort of "dumb", in that it doesn't
-- know anything about primary keys
-- or anything about the underlying table(s)
-- it only considers values from the fields you provide
-- consider this:
SELECT DISTINCT firstname
FROM Person
-- it doesn't give distinct Person records, just distinct firstnames
-- now this:
SELECT DISTINCT lastname
FROM Person
-- and the distinct applies to the *combination*
-- of all the fields in the SELECT clause
-- in this example,
-- all distinct *combinations* of gender and firstname are shown
SELECT DISTINCT gender,
firstname
FROM Person
ORDER BY gender,
firstname
-- in summary,
-- distinct removes duplicates
-- it removes duplicates based on all fields in the SELECT list
-- when used with COUNT, it will not count duplicate values
-- thanks for watching!
-- Database by Doug
-- Douglas Kline
-- 10/9/2018
-- DISTINCT - removing duplicates, but in a "dumb" way
Thursday, October 4, 2018
Numeric Expressions in SQL
-- Database by Doug
-- Douglas Kline
-- 8/28/2018
-- Numeric Expressions - expressions that return a numeric data type
-- beginner level
-- simple numeric expression examples
-- and things to keep in mind
-- I might say 'db'
-- which is shorthand for 'database engine'
-- this is the server software that interprets
-- your SQL and performs actions
-- you can ask your database engine (db) to be your calculator:
SELECT 1 + 2
-- if you are familar with spreadsheets
-- and how to type formulas
-- it is very similar
SELECT 1 + 2
-- an expression is something that the db
-- will read, translate, and perform
-- so in the expression above
-- it reads a 1, then a plus sign, then a 2
-- interprets it as addition
-- performs the addition
-- and returns the results
--------------------------------------------------
SELECT 1 + 2
-- there are certain items in the expression
-- the plus sign (+) is an *operator*
-- more specifically, it is a *binary* operator
-- it defines an operation on two items (thus binary)
-- more specifically, the items on its left and right
-- the plus sign says to the db:
-- take the items on my left and right and add them
-- it is an instruction to the db
SELECT 1 + 2
-- the 1 and 2 are the *operands* of the operator
-- they are the items that the operator applies to
-- the 1 and 2 are also examples of *literals*
-- in other words 1 literally means the number 1
-- in coding, it means: not code, a value
-- here is an example of a unary operator (one operand)
SELECT -5
-- here, the literal value 5
-- is modified to become negative
-- by the minus sign operator
------------------------------------------------------------
-- how does the db know what *is* an expression, and what *isn't*
-- in this example
SELECT 1 + 2
-- the db is *expecting* an expression to be in this position
-- (after the word SELECT)
-- so expressions are mainly known by their
-- location/position in your SELECT statement
---------------------------------------------------------
-- notice that usual order of operations is followed
-- * and / come before + and -
-- otherwise it is left-to-right,
-- unless you add parentheses
-- then sub-expressions in parentheses are evaluated first
SELECT 1 + 2 * 6
SELECT (1 + 2) * 6
SELECT (3 - 1) / 2
------------------------------------------------------
-- notice that operators sometimes change meaning
-- depending on their context
SELECT 1 / 3 -- does integer division
SELECT 1.0 / 3.0 -- floating point division
SELECT 1 / 3.0 -- floating point division
SELECT 1.0 / 3 -- floating point division
-- if both operands (items on either side of the operator)
-- are whole numbers, do integer division
-- otherwise do floating point division
-----------------------------------------------------------------------------
-- here's an operator that might be new to you: %
SELECT 5 % 3 -- modulus operator - gives the remainder after integer division
SELECT 8 % 3 -- same answer
SELECT (8 / 3) * 3 + 8 % 3 -- equals 8
-----------------------------------------------------------------------------
-- what if your data is not a literal?
-- in other words, it comes out of the db
SELECT unitsInstock -- this is an expression, just very simple
FROM Products
SELECT unitsInStock + unitsOnOrder -- effective inventory
FROM Products
-- plus is still the operator
-- the operands are UnitsInStock and unitsOnOrder
-- the expression is evaluated on each record, separately
-----------------------------------------------------
-- but what if the data type is not right for
-- the operations I want to do?
-- specifically, the data is whole numbers
-- but I want to do floating point division
SELECT unitsInStock,
reOrderLevel,
unitsinstock / reOrderLevel
FROM Products
WHERE reOrderLevel <> 0 -- to avoid division by zero
-- but I'd really like to see the results as a floating point/decimal
-- you have to convert *prior* to dividing
SELECT unitsInStock,
reOrderLevel,
CONVERT(real,unitsinstock) / reOrderLevel
FROM Products
WHERE reOrderLevel <> 0 -- to avoid division by zero
-- the unitsInStock is converted to a real (aka floating point number)
-- before it gets divided
-- since one of the items operands
-- is a floating point number, it does floating point division
-------------------------------------------------------------
-- perhaps a more realistic and useful expression
SELECT ProductID,
ProductName,
unitsInStock * unitprice AS [Dollar Value in Inventory]
FROM Products
ORDER BY ProductID
-- note that unitprice is a money data type,
-- and the unitsInstock is a whole number
-- so the result is shown as a money data type
-- (note the 2 digits to the right of the decimal)
-- we can show this explicitly:
SELECT SQL_VARIANT_PROPERTY(unitPrice, 'BaseType'),
SQL_VARIANT_PROPERTY(unitsInStock, 'BaseType'),
SQL_VARIANT_PROPERTY(unitsInStock * unitPrice, 'BaseType')
FROM Products
-----------------------------------------------------------------------
-- in summary
-- expressions are instructions for the db to create new values
-- expressions are known by their location in the statement
-- expressions have operators and operands
-- Database by Doug
-- Douglas Kline
-- 8/28/2018
-- Numeric Expressions
String Expressions in SQL
-- Database by Doug
-- Douglas Kline
-- 8/28/2018
-- Character Expressions - expressions that return character data
-- beginner level
-- might want to check out numeric expressions
------------------------------------------------------------------
-- consider this statement
SELECT 'Fred',
'Flintstone'
-- note that the single quotes are not shown in
-- the table results
-- this is because that are not *part of* the data
-- they are used to mark the beginning and end of the data
-- and are called *delimiters*
-- so the statement has two pieces of data
-- delimited by single quotes
SELECT 'Fred',
'Flintstone'
-- the *type* of data is *character* data
-- more specifically, a *sequence of characters*
-- in programming, they would be called *strings*
-- another term might be *string literals*
-- now look at this:
SELECT 123 AS [numeric data type],
'123' AS [character data type]
-- the first column is a number
-- the second column is a sequence of characters
-- first the character '1', then the character '2', ...
-- now consider this
SELECT 1 + 5 AS [numeric],
'1' + '5' AS [character]
-- see how the plus sign means something different
-- depending on what's on either side?
-- with character data, the plus sign means *concatenate*
-- or 'put together'
-- so the basic operator for character expressions
-- is the plus sign
-- we can prove this:
SELECT SQL_VARIANT_PROPERTY(1 + 5, 'BaseType'),
SQL_VARIANT_PROPERTY('1' + '5', 'BaseType')
-- here'a another example:
SELECT 'Fred' + ' ' + 'Flintstone' AS [Full Name]
-------------------------------------------------------------
-- there's lots more we want to do with
-- character data
-- but we need functions....
-- here are a few simple ones:
SELECT LEN (' Fred'), -- find how many chars - note the spaces
UPPER(' Fred'), -- convert all chars to upper case
LOWER(' Fred'), -- convert all chars to lower case
LEFT (' Fred', 2), -- return just the left-most 2 chars
RIGHT(' Fred', 2), -- return just the right-most 2 chars
LTRIM(' Fred') -- get rid of any space chars on left
-- you might also want to find the location of a certain character
SELECT CHARINDEX(' ', 'Fred Flintstone') AS [location of space character]
-- and also, you can combine these functions into a more complex expression
SELECT 'Fred Flintstone' AS [original full name],
RIGHT('Fred Flintstone', -- right-most 10 characters
LEN('Fred Flintstone') -- the number 15
- CHARINDEX(' ', 'Fred Flintstone') -- the number 5
) AS [last name only]
---------------------------------------------------------------------------------
-- what about data from a table - not literals
SELECT ProductName,
LEN(ProductName) AS [length of name],
UPPER(ProductName) AS [uppercase],
CHARINDEX(' ', ProductName),
RIGHT(ProductName, LEN(ProductName) - CHARINDEX(' ', ProductName))
FROM Products
---------------------------------------------------------------------------------
-- in summary
-- string literals are delimited by single quotes
-- plus sign means concatenate
-- functions help with other tasks
-- Database by Doug
-- Douglas Kline
-- 8/28/2018
-- Character Expressions
Primary Keys in Relational Databases
-- Database by Doug -- Douglas Kline -- 9/28/2018 -- Primary Keys USE Northwind -- note that I'm starting out with a -- modified version of the Northwind.Categories table -- called DBBDCategories -- for demo purposes -- Primary Keys -- a primary key has two purposes: ---- to guarantee access to every record in a table ---- to uniquely distinguish a record from all other records in the table -- this is done by requiring a primary key to be: ---- NOT NULL ---- UNIQUE across all records in that table -- the NOT NULL makes sure that *at least one* column is NOT NULL -- and that there is a WHERE clause that will match on every record -- for example: -- WHERE categoryID = 7 -- guarantees one of: -- categories with an categoryID of 7 -- there is no record with an *unknown* ID that *might be* 7 -- the UNIQUE makes sure that you can make a request of records -- that guarantees at most one record -- for example: -- WHERE categoryID = 7 -- guarantees one of: -- a *single* record with the category with an categoryID of 7 -- no category - there is definitively no record with an categoryID of 7 --*************************************************** -- tables don't absolutely require a primary key -- however, guaranteed, unique access is extremely helpful in many situations -- (particularly Online Transaction Processing Systems /OLTPs) -- AND, there can be significant performance benefits to having a PK -- (for certain operations) -- Consider my somewhat modified DBBDCategories table from Northwind SELECT * FROM DBBDCategories -- I happen to know that CategoryID is the primary key of this table -- but how would you know otherwise? -- you could look in the object inspector on the left -- expand the database, the tables, the Categories table, the Columns -- note the key icon on the left of the CategoryID, and the 'PK' in parentheses -- (demo) -- another way would be to put the table into a diagram -- note the yellow key icon next to the CategoryID -- (demo) -- Finally, a really definitive way would be to script out the table -- and see the full definition of the table in SQL -- (demo) -- here is a slightly abbreviated/edited version of -- the SQL CREATE statment for the DBBDCategories table: --CREATE TABLE [dbo].[DBBDCategories] --( -- [CategoryID] [int] NOT NULL, -- [CategoryName] [nvarchar](15) NOT NULL, -- [Description] [ntext] NULL, -- [Picture] [image] NULL, -- CONSTRAINT [PK_DBBDCategories] PRIMARY KEY CLUSTERED -- ( -- [CategoryID] ASC -- ) --) -- note the NOT NULL for CategoryID -- this means that every insert and update will be checked to make sure -- that the CategoryID does not become NULL -- let's try it -- demo insert new NULL PK -- demo update PK to a NULL -- note the PRIMARY KEY specified as a CONSTRAINT -- this means that every insert and update will be checked to make sure -- that no two CategoryIDs, in the WHOLE TABLE, get duplicated -- let's try it -- demo insert duplicate PK -- demo update PK to a duplicate value -- let's review -- we are guaranteed that no record in the categories table has an unknown CategoryID -- we are guaranteed that no two records in the categories table have the same CategoryID -- in other words -- WHERE CategoryID = 8 -- guarantees to return either no record, or one record -- and we are guaranteed that there is no record -- with an *unknown* CategoryID which is actually 8 -- so what does a categoryID of 8 actually mean? --....? --....? -- NOTHING!! -- primary keys don't *need* to mean anything -- they just have to be UNIQUE and NOT NULL -- now let's look at the definition of -- the *actual* Categories table from the Northwind DB --CREATE TABLE [dbo].[Categories] --( -- [CategoryID] [int] IDENTITY(1,1) NOT NULL, -- ** note IDENTITY(1,1) -- [CategoryName] [nvarchar](15) NOT NULL, -- [Description] [ntext] NULL, -- [Picture] [image] NULL, -- CONSTRAINT [PK_Categories] PRIMARY KEY CLUSTERED -- ** note PRIMARY KEY -- ( -- [CategoryID] ASC -- ) --) ON [PRIMARY] -- note the PRIMARY KEY in place of UNIQUE -- these are interchangeable, they mean the same thing -- however, also note the IDENTITY(1,1) -- this is an example of defining a *synthetic* primary key -- we are relinquishing to the DB engine, -- the task of providing PK values for this column -- specifically, the first PK value should be 1 -- the second PK value should be 1 + 1 -- the third PK value should be 2 + 1 -- the fourth PK value should be 3 + 1 -- in other words, begin with 1, and increment by 1 each time -- this also means that NO ONE ELSE, besides the DB engine -- can change the value of Categories.CategoryID -- (demo) try to change a PK value -- (demo) try to insert a PK value -- also, IDENTITY values are never re-used -- this guarantees uniqueness across the live table and all archives -- for example, if we archive CategoryID = 7 (move to another place) -- then end up reusing CategoryID = 7 for another record -- those two records can be confused for each other -- (demo) create a new record, delete it, show next PK value, skipped -- why is this good? -- HUMANS might accidentally re-use archived PK values -- also, columns that *mean* something may need to be changed -- it does exactly what is needed, and no more -- internal to the db engine, this can significantly improve performance -- in summary, -- primary keys need to be UNIQUE and NOT NULL, and nothing more -- this guarantees unique access to each record -- *synthetic* keys can be helpful in keeping all key values UNIQUE -- this is especially helpful in OLTP type systems -- foreshadowing ... -- primary keys also are referenced by FOREIGN KEYS -- see the video on FOREIGN KEYS! -- thanks for watching! -- Database by Doug -- Douglas Kline -- 9/28/2018 -- Primary Keys
Foreign Keys in Relational Databases
-- Database by Doug
-- Douglas Kline
-- 10/1/2018
-- Foreign Keys
USE Northwind
-- Foreign Keys
-- a field in one table (table A)
-- that references a primary key value in another table (table B)
-- for example, the Categories and Products table from Northwind
-- (show diagram)
-- in this case, the foreign key is
-- Products.CategoryID, i.e., the CategoryID field in Products
-- from a practical usage standpoint
-- the foreign key is a "lookup"
-- consider this:
SELECT ProductID,
ProductName,
CategoryID
FROM Products
SELECT CategoryID,
CategoryName
FROM Categories
-- note how I can use the Products.CategoryID
-- value to find the CategoryName of the right Category
-- note that Primary Keys are UNIQUE and NOT NULL
-- e.g., Products.ProductID, Categories.CategoryID
-- but Foreign Keys can be NOT UNIQUE and NULL
-- e.g., Products.CategoryID
-- in this situation, many products can be
-- in the same category
-- and a product could have an unknown category
-- (products can exist without a category)
-- in a sense, FKs define a "relationship"
-- between two records
-- in this case, each product record is related
-- to a category record
-- (or not, in the case of NULL FK)
-- to be able to "lookup" from one table to another
-- you just need two fields of similar data types
-- however, what if you want to make sure
-- that the lookups values are always *good*
-- in other words, that values in
-- Products.CategoryID are actually over in
-- the Categories table?
-- this requirement, that FK values are valid
-- is called *referential integrity*
-- note that this is a constraint on
-- the Products.CategoryID field
-- it must be a valid value that
-- exists over in the Categories.CategoryID field
-- let's see it in the definition of Products
CREATE TABLE [dbo].[Products]
(
[ProductID] [int] IDENTITY(1,1) NOT NULL,
[ProductName] [nvarchar](40) NOT NULL,
[CategoryID] [int] NULL
-- other fields omitted for clarity
CONSTRAINT [PK_Products]
PRIMARY KEY CLUSTERED
(
[ProductID] ASC
)
CONSTRAINT [FK_Products_Categories]
FOREIGN KEY
(
[CategoryID]
)
REFERENCES [dbo].[Categories] ([CategoryID])
)
-- note the REFERENCES section
-- note the allowed NULL for CategoryID
-- this means that on any UPDATE or INSERT to Products
-- a Product record must always have a valid CategoryID
-- or a NULL value (unknown)
-- (demo this)
-- it also means that on any UPDATE or DELETE to Categories
-- all Product records must always have a valid CategoryID
-- or a NULL value (unknown)
-- (demo this)
-- If we have referential integrity for all Product records
-- then we can be confident that our JOINs between the two tables
-- provide valid results
SELECT Products.ProductID,
Products.ProductName,
Products.CategoryID,
Categories.CategoryID,
Categories.CategoryName
FROM Products
JOIN Categories ON Products.CategoryID = Categories.CategoryID
ORDER BY Products.ProductID
-- JOINs are a way of doing multiple "lookups"
-- of FK values to PK values
-- but that's another video...
-- in summary,
-- foreign keys provide a "lookup" mechanism
-- for records in two different tables
-- this also defines how the two tables are "related"
-- *referential integrity* enforces "good" foreign keys
-- in other words, a FK should always have a valid value
-- FK values can repeat - be non-UNIQUE
-- FK values can be NULL - unknown
-- thanks for watching!
-- Database by Doug
-- Douglas Kline
-- 10/1/2018
-- Foreign Keys
Subscribe to:
Comments (Atom)