T-SQL: NVARCHAR(255) vs. NVARCHAR(MAX) Disk Space Comparison


In some scenarios like ETL or load data into a DB, we need to insert the data into a staging table, where all the columns are String data type and on the next step to cleanse and manipulate the data. I heard a discussion about the data type of the columns in the staging table – NVARCHAR(255) or NVARCHAR(MAX). Some of the arguments were:

  • Statement 1NVARCHAR(MAX) will return error message if we have more than 20 columns
  • Statement 2 NVARCHAR(MAX) takes much more disk space

I decided to have a little fun and reproduce the scenario when we insert random data in both tables – NVARCHAR(255) and NVARCHAR(MAX)

The steps are:

  • Create databases – DB_255 and DB_MAX
  • Create tables with 100 columns in each DB – Table_255 and Table_MAX
  • Populate the tables with 1000 random rows
  • Duplicate the rows until we have 32,000 rows in each table
  • Check the size of the .mdf for each DB

Create databases

USE [master];
GO
 
IF (DB_ID('DB_255') IS NOT NULL)
	BEGIN
		ALTER DATABASE DB_255 SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
		DROP DATABASE DB_255;
	END
GO
 
CREATE DATABASE DB_255;
GO

IF (DB_ID('DB_MAX') IS NOT NULL)
	BEGIN
		ALTER DATABASE DB_MAX SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
		DROP DATABASE DB_MAX;
	END
GO
 
CREATE DATABASE DB_MAX;
GO

Create test objects
-- 255
USE DB_255;
GO

-- Table_255
IF (OBJECT_ID('dbo.Table_255', 'U') IS NOT NULL)
	BEGIN DROP TABLE dbo.Table_255; END
GO

DECLARE
	@Counter TINYINT = 1
	, @DynamicSQL VARCHAR(MAX);

SET @DynamicSQL = '
CREATE TABLE dbo.Table_255
	(';

WHILE (@Counter <= 100)
	BEGIN
		SET @DynamicSQL += '
		Col_' + CAST(@Counter AS VARCHAR(3)) + ' NVARCHAR(255),';

		SET @Counter += 1;
	END

SET @DynamicSQL = REVERSE(SUBSTRING(REVERSE(@DynamicSQL), 2, (LEN(@DynamicSQL) - 1)))

SET @DynamicSQL += '
	);
';

EXEC (@DynamicSQL);
GO

-- ViewRandHelper_255
IF (OBJECT_ID('dbo.ViewRandHelper_255', 'V') IS NOT NULL)
	BEGIN DROP VIEW dbo.ViewRandHelper_255; END
GO

CREATE VIEW dbo.ViewRandHelper_255 AS
SELECT RAND() AS R;
GO

-- FunctionRandonString_255
IF (OBJECT_ID('dbo.FunctionRandonString_255', 'FN') IS NOT NULL)
	BEGIN DROP FUNCTION dbo.FunctionRandonString_255; END
GO

CREATE FUNCTION dbo.FunctionRandonString_255(@StringLength AS INT)
RETURNS VARCHAR(MAX) AS
BEGIN
	DECLARE
		@String VARCHAR(100)
		, @Characters VARCHAR(100)
		, @Counter INT;

	SELECT
		@Characters = ''
		, @Counter = 48;

	WHILE @Counter <= 122
	BEGIN
		SET @Characters = @Characters + CAST(CHAR(@Counter) AS CHAR(1));
		SET @Counter += 1;
	END

	SELECT
		@Counter = 0
		, @String = '';

	WHILE (@Counter < @StringLength)
	BEGIN
		SET @String = @String + SUBSTRING(@Characters, CAST(CEILING((SELECT R FROM ViewRandHelper_255) * LEN(@Characters)) AS INT), 1);
		SET @Counter += 1;
	END

		RETURN @String
END
GO

-- MAX
USE DB_MAX;
GO

-- Table_MAX
IF (OBJECT_ID('dbo.Table_MAX', 'U') IS NOT NULL)
	BEGIN DROP TABLE dbo.Table_MAX; END
GO

DECLARE
	@Counter TINYINT = 1
	, @DynamicSQL VARCHAR(MAX);

SET @DynamicSQL = '
CREATE TABLE dbo.Table_MAX
	(';

WHILE (@Counter <= 100)
	BEGIN
		SET @DynamicSQL += '
		Col_' + CAST(@Counter AS VARCHAR(3)) + ' NVARCHAR(MAX),';

		SET @Counter += 1;
	END

SET @DynamicSQL = REVERSE(SUBSTRING(REVERSE(@DynamicSQL), 2, (LEN(@DynamicSQL) - 1)))

SET @DynamicSQL += '
	);
';

EXEC (@DynamicSQL);
GO

-- ViewRandHelper_MAX
IF (OBJECT_ID('dbo.ViewRandHelper_MAX', 'V') IS NOT NULL)
	BEGIN DROP VIEW dbo.ViewRandHelper_MAX; END
GO

CREATE VIEW dbo.ViewRandHelper_MAX AS
SELECT RAND() AS R;
GO

-- FunctionRandonString_MAX
IF (OBJECT_ID('dbo.FunctionRandonString_MAX', 'FN') IS NOT NULL)
	BEGIN DROP FUNCTION dbo.FunctionRandonString_MAX; END
GO

CREATE FUNCTION dbo.FunctionRandonString_MAX(@StringLength AS INT)
RETURNS VARCHAR(MAX) AS
BEGIN
	DECLARE
		@String VARCHAR(100)
		, @Characters VARCHAR(100)
		, @Counter INT;

	SELECT
		@Characters = ''
		, @Counter = 48;

	WHILE @Counter <= 122
	BEGIN
		SET @Characters = @Characters + CAST(CHAR(@Counter) AS CHAR(1));
		SET @Counter += 1;
	END

	SELECT
		@Counter = 0
		, @String = '';

	WHILE (@Counter < @StringLength)
	BEGIN
		SET @String = @String + SUBSTRING(@Characters, CAST(CEILING((SELECT R FROM ViewRandHelper_MAX) * LEN(@Characters)) AS INT), 1);
		SET @Counter += 1;
	END

		RETURN @String
END
GO

Check test objects
-- 255
USE DB_255;
GO

SELECT
	'Top 2' AS RowType_255
	, T1.ColumnName AS ColumnName_255
	, T1.ColumnID AS ColimnID_255
	, T1.TypeName AS TypeName_255
	, T1.TypeMaxLength AS TypeMaxLength_255
FROM
	(
		SELECT TOP 2
			C.[name] AS ColumnName
			, C.[column_id] AS ColumnID
			, T.[name] AS TypeName
			, C.[max_length] AS TypeMaxLength
		FROM
			sys.columns AS C
			JOIN sys.types AS T
				ON C.user_type_id = T.user_type_id
		WHERE OBJECT_NAME(C.[object_id]) = 'Table_255'
		ORDER BY C.[column_id]
	) AS T1

UNION ALL

SELECT
	'Bottom 2'
	, T2.ColumnName AS ColumnName_255
	, T2.ColumnID AS ColimnID_255
	, T2.TypeName AS TypeName_255
	, T2.TypeMaxLength AS TypeMaxLength_255
FROM
	(
		SELECT TOP 2
			C.[name] AS ColumnName
			, C.[column_id] AS ColumnID
			, T.[name] AS TypeName
			, C.[max_length] AS TypeMaxLength
		FROM
			sys.columns AS C
			JOIN sys.types AS T
				ON C.user_type_id = T.user_type_id
		WHERE OBJECT_NAME(C.[object_id]) = 'Table_255'
		ORDER BY C.[column_id] DESC
	) AS T2
ORDER BY ColumnID;

SELECT dbo.FunctionRandonString_255(212) As RandomString_255;
GO

-- MAX
USE DB_MAX;
GO

SELECT
	'Top 2' AS RowType_MAX
	, T1.ColumnName AS ColumnName_MAX
	, T1.ColumnID AS ColimnID_MAX
	, T1.TypeName AS TypeName_MAX
	, T1.TypeMaxLength AS TypeMaxLength_MAX
FROM
	(
		SELECT TOP 2
			C.[name] AS ColumnName
			, C.[column_id] AS ColumnID
			, T.[name] AS TypeName
			, C.[max_length] AS TypeMaxLength
		FROM
			sys.columns AS C
			JOIN sys.types AS T
				ON C.user_type_id = T.user_type_id
		WHERE OBJECT_NAME(C.[object_id]) = 'Table_MAX'
		ORDER BY C.[column_id]
	) AS T1

UNION ALL

SELECT
	'Bottom 2'
	, T2.ColumnName AS ColumnName_MAX
	, T2.ColumnID AS ColimnID_MAX
	, T2.TypeName AS TypeName_MAX
	, T2.TypeMaxLength AS TypeMaxLength_MAX
FROM
	(
		SELECT TOP 2
			C.[name] AS ColumnName
			, C.[column_id] AS ColumnID
			, T.[name] AS TypeName
			, C.[max_length] AS TypeMaxLength
		FROM
			sys.columns AS C
			JOIN sys.types AS T
				ON C.user_type_id = T.user_type_id
		WHERE OBJECT_NAME(C.[object_id]) = 'Table_MAX'
		ORDER BY C.[column_id] DESC
	) AS T2
ORDER BY ColumnID;

SELECT dbo.FunctionRandonString_MAX(212) As RandomString_MAX;
GO

Populate test tables

-- 255
USE DB_255;
GO

DECLARE
	@Counter TINYINT = 1
	, @DynamicSQL VARCHAR(MAX);

SET @DynamicSQL = '
INSERT Table_255
	(';

WHILE (@Counter <= 100)
	BEGIN
		SET @DynamicSQL += '
		Col_' + CAST(@Counter AS VARCHAR(3)) + ',';

		SET @Counter += 1;
	END

SET @DynamicSQL = REVERSE(SUBSTRING(REVERSE(@DynamicSQL), 2, (LEN(@DynamicSQL) - 1)))

SET @DynamicSQL += '
	)
SELECT TOP 1000
';

SELECT @Counter = 1;

WHILE (@Counter <= 100)
	BEGIN
		SET @DynamicSQL += '
		dbo.FunctionRandonString_255(212),';

		SET @Counter += 1;
	END

SET @DynamicSQL = REVERSE(SUBSTRING(REVERSE(@DynamicSQL), 2, (LEN(@DynamicSQL) - 1)))

SET @DynamicSQL += '
FROM
	sys.all_objects AS T1
	CROSS JOIN sys.all_objects AS T2
';

EXEC (@DynamicSQL);
GO

INSERT dbo.Table_255
SELECT *
FROM dbo.Table_255;
GO 5

-- MAX
USE DB_MAX;
GO

DECLARE
	@Counter TINYINT = 1
	, @DynamicSQL VARCHAR(MAX);

SET @DynamicSQL = '
INSERT Table_MAX
	(';

WHILE (@Counter <= 100)
	BEGIN
		SET @DynamicSQL += '
		Col_' + CAST(@Counter AS VARCHAR(3)) + ',';

		SET @Counter += 1;
	END

SET @DynamicSQL = REVERSE(SUBSTRING(REVERSE(@DynamicSQL), 2, (LEN(@DynamicSQL) - 1)))

SET @DynamicSQL += '
	)
SELECT TOP 1000
';

SELECT @Counter = 1;

WHILE (@Counter <= 100)
	BEGIN
		SET @DynamicSQL += '
		dbo.FunctionRandonString_MAX(212),';

		SET @Counter += 1;
	END

SET @DynamicSQL = REVERSE(SUBSTRING(REVERSE(@DynamicSQL), 2, (LEN(@DynamicSQL) - 1)))

SET @DynamicSQL += '
FROM
	sys.all_objects AS T1
	CROSS JOIN sys.all_objects AS T2
';

EXEC (@DynamicSQL);
GO

INSERT dbo.Table_MAX
SELECT *
FROM dbo.Table_MAX;
GO 5

Check test tables columns and rows count
-- 255
USE DB_255;
GO

SELECT TOP 2 *
FROM dbo.Table_255;
GO

SELECT COUNT(Col_1) AS Count_255
FROM dbo.Table_255;
GO

-- MAX
USE DB_MAX;
GO

SELECT TOP 2 *
FROM dbo.Table_MAX;
GO

SELECT COUNT(Col_1) As Count_MAX
FROM dbo.Table_MAX;
GO

Check test tables columns and rows count

Check file size

SELECT
	DB_NAME([database_id]) AS [Database Name]
	, [type_desc] AS [File Type]
	, [name] AS [File Name]
	, physical_name AS [File Path]
	, (size * 8) / 1024 [File Size (MB)]
FROM sys.master_files
WHERE DB_NAME([database_id]) IN ('DB_255', 'DB_MAX')
GO

Check file size

Delete databases

USE [master];
GO
 
IF (DB_ID('DB_255') IS NOT NULL)
	BEGIN
		ALTER DATABASE DB_255 SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
		DROP DATABASE DB_255;
	END
GO

IF (DB_ID('DB_MAX') IS NOT NULL)
	BEGIN
		ALTER DATABASE DB_MAX SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
		DROP DATABASE DB_MAX;
	END
GO

Conclusion:

  • Both statements failed
  • There is a hint in the random function – we can’t use RAND() inside a function. The runaround is to create a view that selects RAND() and use the view inside the function
  • You can use the function string as inspiration to create your own one

Keep it simple :-)