Data Warehouse: Create Date Dimension (version 1)


Data Warehouse is “a system used for reporting and data analysis”. In general it is built by dim (dimension) and fact tables (structure and data tables). Here I create a stored procedure to build Date Dimension. Now it builds only calendar data and will be expanded to include “fiscal” data and “offset” from a specific date (today or the last business day).

Let’s say I have a server, dedicated for ETL. I have a schema “etl” for storing objects during the run of the ETL processes and “dw” for storing the actual Data Warehouse. In a production environment I’ll have much more objects (Detailed Log table, Job Agent Schedules, SSIS packages, etc.), but for this example I will simplify the scenario and will focus on dimDate.

Create database.

USE [master];
GO

IF (DB_ID('PL') IS NOT NULL)
	BEGIN
		ALTER DATABASE PL SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
		DROP DATABASE PL;
	END
GO

CREATE DATABASE PL;
GO

USE PL;
GO

CREATE SCHEMA dw;
GO

CREATE SCHEMA etl;
GO

CREATE TABLE PL.etl.[Log]
	(
		[DateTime] DATETIME
		, ETLSource SYSNAME
		, [Action] NVARCHAR(9) -- Start, End, Execution, Error
		, ExecutionID UNIQUEIDENTIFIER
		, ExecutedBy NVARCHAR(128)
	);
GO

Create stored procedure to build Date Dimension.

The parameters I pass are:

  • FirstDayOfWeek –> the definition of a “week” differs in different cultures. Sunday in North America, Monday in Europe etc.
  • YearsPast –> how many full past years will be included in dw.dimDate table
  • YearsFuture –> how many full future years will be included in dw.dimDate table

The (innermost) level is day or one line represents a data for one day.

SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

-- =============================================
-- Author:		Peter Lalovsky
-- Create date: 2015-02-05
-- Description:	Create Date Dimension in Data Warehouse (version 1)
-- =============================================

CREATE PROCEDURE etl.spDimDate1
	@FirstDayOfWeek VARCHAR(9) = 'Monday' -- Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday 
	, @YearsPast TINYINT = 5 -- Full past years from the current year
	, @YearsFuture TINYINT = 2 -- Full future years from the current year
AS
BEGIN -- etl.dimDate1

SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
SET NOCOUNT ON;

DECLARE @ExecutionID UNIQUEIDENTIFIER
SET @ExecutionID = NEWID();

-- Log --> Start
INSERT PL.etl.[Log] ([DateTime], ETLSource, [Action], ExecutionID, ExecutedBy)
SELECT GETDATE(), @@PROCID, 'Start', @ExecutionID, SUSER_SNAME();

-- @FirstDayOfWeek to @DateFirst - Start
DECLARE @DateFirst TINYINT;
SELECT @DateFirst =
	CASE @FirstDayOfWeek
		WHEN 'Monday' THEN 1
		WHEN 'Tuesday' THEN 2
		WHEN 'Wednesday' THEN 3
		WHEN 'Thursday' THEN 4
		WHEN 'Friday' THEN 5
		WHEN 'Saturday' THEN 6
		WHEN 'Sunday' THEN 7
	END;

SET DATEFIRST @DateFirst;
-- @FirstDayOfWeek to @DateFirst - End

--** Variables - Start **--
DECLARE
	@DateStart DATETIME
	, @DateEnd DATETIME
	, @DateLoop DATETIME;

SELECT
	@DateStart = CAST((YEAR(CONVERT(CHAR(10), GETDATE(), 120)) - @YearsPast) AS CHAR(4)) + '-01-01'
	, @DateEnd = CAST((YEAR(CONVERT(CHAR(10), GETDATE(), 120)) + @YearsFuture) AS CHAR(4)) + '-12-31'
	, @DateLoop = @DateStart;
--** Variables - End **--

--** PL.etl.dimDate1 - Start **--
IF (OBJECT_ID('PL.etl.dimDate1', 'U') IS NOT NULL)
	BEGIN TRUNCATE TABLE PL.etl.dimDate1; END
ELSE
	BEGIN
		CREATE TABLE PL.etl.dimDate1
			(
				[Key] INT NOT NULL
				, [Date] DATETIME
				, IsWeekend BIT
				, DayOfWeekLiteral VARCHAR (9) COLLATE Latin1_General_CI_AS
				, DayOfWeekShortLiteral CHAR (3) COLLATE Latin1_General_CI_AS
				, YearCalendar SMALLINT
				, QuarterCalendar TINYINT
				, MonthCalendar TINYINT
				, MonthLiteralCalendar VARCHAR (9) COLLATE Latin1_General_CI_AS
				, MonthLiteralShortCalendar CHAR (3) COLLATE Latin1_General_CI_AS
				, WeekOfYearCalendar TINYINT
				, WeekOfQuarterCalendar TINYINT
				, WeekOfMonthCalendar TINYINT
				, DayOfYearCalendar SMALLINT
				, DayOfQuarterCalendar TINYINT
				, DayOfMonthCalendar TINYINT
				, DayOfWeekCalendar TINYINT
				, BusinessDayOfYearCalendar SMALLINT
				, BusinessDayOfQuarterCalendar TINYINT
				, BusinessDayOfMonthCalendar TINYINT
				, BusinessDayOfWeekCalendar TINYINT
			);

		ALTER TABLE PL.etl.dimDate1
		ADD CONSTRAINT PK_etlDimDate1 PRIMARY KEY ([Key]);
	END

WHILE (@DateLoop <= @DateEnd)
BEGIN
	INSERT PL.etl.dimDate1
		(
			[Key]
			, [Date]
			, DayOfWeekLiteral
			, DayOfWeekShortLiteral
			, IsWeekend	
			, YearCalendar
			, QuarterCalendar
			, MonthCalendar
			, MonthLiteralCalendar
			, MonthLiteralShortCalendar
			, WeekOfYearCalendar
			, DayOfYearCalendar
			, DayOfMonthCalendar
			, DayOfWeekCalendar
		)
	SELECT
		CAST(CONVERT(CHAR(8), @DateLoop, 112) AS INT) -- [Key]
		, @DateLoop -- [Date]
		, DATENAME(DW, @DateLoop) -- DayOfWeekLiteral
		, LEFT(DATENAME(DW, @DateLoop), 3) -- DayOfWeekShortLiteral
		, CASE WHEN DATENAME(DW, @DateLoop) IN ('Saturday', 'Sunday') THEN 1 END -- IsWeekend
		, YEAR(@DateLoop) -- YearCalendar
		, DATEPART(QQ, @DateLoop) -- QuarterCalendar
		, MONTH(@DateLoop) -- MonthCalendar
		, DATENAME(MM, @DateLoop) -- MonthLiteralCalendar
		, LEFT(DATENAME(MM, @DateLoop), 3) -- MonthLiteralShortCalendar
		, DATEPART(WK, @DateLoop) -- WeekOfYearCalendar
		, DATEPART (DY, @DateLoop) -- DayOfYearCalendar
		, DAY(@DateLoop) -- DayOfMonthCalendar
		, DATEPART(DW, @DateLoop) -- DayOfWeek
	
	SET @DateLoop = DATEADD(DD, 1, @DateLoop);
END

-- Update Calendar Data - Start
UPDATE PL.etl.dimDate1
SET
	WeekOfQuarterCalendar = H.WQC
	, WeekOfMonthCalendar = H.WMC
	, DayOfQuarterCalendar = H.DQC
FROM
	(
		SELECT
			[Key] AS K
			, DENSE_RANK() OVER (PARTITION BY YearCalendar, QuarterCalendar ORDER BY WeekOfYearCalendar) AS WQC
			, DENSE_RANK() OVER (PARTITION BY YearCalendar, MonthCalendar ORDER BY WeekOfYearCalendar) AS WMC
			, ROW_NUMBER() OVER (PARTITION BY YearCalendar, QuarterCalendar ORDER BY DayOfYearCalendar) AS DQC
		FROM PL.etl.dimDate1
	) AS H
WHERE [Key] = H.K;
-- Update Calendar Data - End

-- Update Calendar Business days Data - Start
UPDATE PL.etl.dimDate1
SET
	BusinessDayOfYearCalendar = H.BDYC
	, BusinessDayOfQuarterCalendar = H.BDQC
	, BusinessDayOfMonthCalendar = H.BDMC
	, BusinessDayOfWeekCalendar = H.BDWC
FROM
	(
		SELECT
			[Key] AS K
			, DENSE_RANK() OVER (PARTITION BY YearCalendar ORDER BY DayOfYearCalendar) AS BDYC
			, DENSE_RANK() OVER (PARTITION BY YearCalendar, QuarterCalendar ORDER BY DayOfYearCalendar) AS BDQC
			, ROW_NUMBER() OVER (PARTITION BY YearCalendar, MonthCalendar ORDER BY DayOfYearCalendar) AS BDMC
			, ROW_NUMBER() OVER (PARTITION BY YearCalendar, WeekOfYearCalendar ORDER BY DayOfYearCalendar) AS BDWC
		FROM PL.etl.dimDate1
		WHERE IsWeekend IS NULL
	) AS H
WHERE [Key] = H.K;
-- Update Calendar Business days Data - End
--** PL.etl.dimDate1 - Start **--

--** Insert into dw.dimDate1 - Start **--
IF (OBJECT_ID('PL.dw.dimDate1', 'U') IS NOT NULL)
	BEGIN TRUNCATE TABLE PL.dw.dimDate1; END
ELSE
	BEGIN
		CREATE TABLE PL.dw.dimDate1
			(
				ETLDateTime DATETIME
				, [Key] INT NOT NULL
				, [Date] DATETIME
				, IsWeekend BIT
				, DayOfWeekLiteral VARCHAR (9) COLLATE Latin1_General_CI_AS
				, DayOfWeekShortLiteral CHAR (3) COLLATE Latin1_General_CI_AS
				, YearCalendar SMALLINT
				, QuarterCalendar TINYINT
				, MonthCalendar TINYINT
				, MonthLiteralCalendar VARCHAR (9) COLLATE Latin1_General_CI_AS
				, MonthLiteralShortCalendar CHAR (3) COLLATE Latin1_General_CI_AS
				, WeekOfYearCalendar TINYINT
				, WeekOfQuarterCalendar TINYINT
				, WeekOfMonthCalendar TINYINT
				, DayOfYearCalendar SMALLINT
				, DayOfQuarterCalendar TINYINT
				, DayOfMonthCalendar TINYINT
				, DayOfWeekCalendar TINYINT
				, BusinessDayOfYearCalendar SMALLINT
				, BusinessDayOfQuarterCalendar TINYINT
				, BusinessDayOfMonthCalendar TINYINT
				, BusinessDayOfWeekCalendar TINYINT
			);
			
		ALTER TABLE PL.dw.dimDate1
		ADD CONSTRAINT PK_dwDimDate1 PRIMARY KEY ([Key]);
	END

INSERT PL.dw.dimDate1
	(
		ETLDateTime
		, [Key]
		, [Date]
		, IsWeekend
		, DayOfWeekLiteral
		, DayOfWeekShortLiteral
		, YearCalendar
		, QuarterCalendar
		, MonthCalendar
		, MonthLiteralCalendar
		, MonthLiteralShortCalendar
		, WeekOfYearCalendar
		, WeekOfQuarterCalendar
		, WeekOfMonthCalendar
		, DayOfYearCalendar
		, DayOfQuarterCalendar
		, DayOfMonthCalendar
		, DayOfWeekCalendar
		, BusinessDayOfYearCalendar
		, BusinessDayOfQuarterCalendar
		, BusinessDayOfMonthCalendar
		, BusinessDayOfWeekCalendar
	)
SELECT
	GETDATE()
	, [Key]
	, [Date]
	, IsWeekend
	, DayOfWeekLiteral
	, DayOfWeekShortLiteral
	, YearCalendar
	, QuarterCalendar
	, MonthCalendar
	, MonthLiteralCalendar
	, MonthLiteralShortCalendar
	, WeekOfYearCalendar
	, WeekOfQuarterCalendar
	, WeekOfMonthCalendar
	, DayOfYearCalendar
	, DayOfQuarterCalendar
	, DayOfMonthCalendar
	, DayOfWeekCalendar
	, BusinessDayOfYearCalendar
	, BusinessDayOfQuarterCalendar
	, BusinessDayOfMonthCalendar
	, BusinessDayOfWeekCalendar
FROM PL.etl.dimDate1;
--** Insert into dw.dimDate1 - End **--

--** Clean up - Start **--
IF (OBJECT_ID('PL.etl.dimDate1', 'U') IS NOT NULL)
	BEGIN TRUNCATE TABLE PL.etl.dimDate1; END
--** Clean up - End **--

-- Log --> End
INSERT PL.etl.[Log] ([DateTime], ETLSource, [Action], ExecutionID, ExecutedBy)
SELECT GETDATE(), @@PROCID, 'End', @ExecutionID, SUSER_SNAME();

END -- etl.dimDate1
GO

Test the stored procedure.
EXEC etl.spDimDate1 @FirstDayOfWeek = 'Monday';
SELECT * FROM PL.dw.dimDate1;
GO

EXEC etl.spDimDate1 @FirstDayOfWeek = 'Tuesday';
SELECT * FROM PL.dw.dimDate1;
GO

EXEC etl.spDimDate1 @FirstDayOfWeek = 'Wednesday';
SELECT * FROM PL.dw.dimDate1;
GO

EXEC etl.spDimDate1 @FirstDayOfWeek = 'Thursday';
SELECT * FROM PL.dw.dimDate1;
GO

EXEC etl.spDimDate1 @FirstDayOfWeek = 'Friday';
SELECT * FROM PL.dw.dimDate1;
GO

EXEC etl.spDimDate1 @FirstDayOfWeek = 'Saturday';
SELECT * FROM PL.dw.dimDate1;
GO

EXEC etl.spDimDate1 @FirstDayOfWeek = 'Sunday';
SELECT * FROM PL.dw.dimDate1;
GO

Check the Log.
SELECT *
FROM PL.etl.[Log]
ORDER BY [DateTime];
GO

Log

I ran Mon twice to cache a plan and again in the end to run the next reports for @FirstDayOfWeek = ‘Monday’.
The max execution time is 1 second.

Quick reporting, based on Date Dimension.

-- Business Days In Year
SELECT
	YearCalendar
	, MAX(BusinessDayOfYearCalendar) AS BusinessDaysInYear
FROM dw.dimDate1
GROUP BY YearCalendar
ORDER BY YearCalendar;
GO

Business Days In Year
-- Business Days In Quarter
SELECT
	YearCalendar, QuarterCalendar
	, MAX(BusinessDayOfQuarterCalendar) AS BusinessDaysInQuarter
FROM dw.dimDate1
GROUP BY YearCalendar, QuarterCalendar
ORDER BY YearCalendar, QuarterCalendar;
GO

Business Days In Quarter
-- Business Days In Month
SELECT
	YearCalendar, MonthCalendar
	, MAX(BusinessDayOfMonthCalendar) AS BusinessDaysInMonth
FROM dw.dimDate1
GROUP BY YearCalendar, MonthCalendar
ORDER BY YearCalendar, MonthCalendar;
GO

Business Days In Month

Preview –> dimDate (1).xlsx

Delete database.

USE [master];
GO

IF (DB_ID('PL') IS NOT NULL)
	BEGIN
		ALTER DATABASE PL SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
		DROP DATABASE PL;
	END
GO

Keep it simple :-)