09 May 2006

Generic cross tab / pivot table query for MS SQL Server

Nano and I wrote this procedure nearly a year ago, but I needed it today so I've resurrected it, fixed a minor collation-related bug, and thought I'd release it to the public (via this blog and also here in my snippets library). It generates a 'cross-tab' (or 'pivot table') result set from (almost) any query, even a stored procedure, and optionally allows you to specify a column sort order.

The procedure makes extensive use dynamic SQL and hence 'sp_executesql'. Because the scope of temporary tables does not extend outside of an sp_executesql transaction, I've had to use global temporary tables, which means that this procedure will become a bottleneck if used too frequently by multiple sessions. That said, we've been using it in production for over a year now for an app that has relatively few users, and it works a treat.

This stored procedure is provided 'as is' and without any warranty - I've only tested it on a relatively small number of queries. If you want help to build it into a production app, please get in touch (email rich at semantise dot com).

CREATE PROCEDURE [dbo].[genericCrosstab]

-- @tableSpec
-- A comma delimited list of columns with datatypes (and null constraints, if required)
-- e.g. 'col1 INT, col2 VARCHAR(255) NULL'
-- N.B. You only need provide this list if the source of the data is a stored procedure
@tableSpec NVARCHAR(4000) = '',

-- @sqlSelect
-- The SELECT clause (or 'EXEC %stored_procedure%' statement) for the input data
-- e.g. 'SELECT a.au_id, a.au_fname, a.au_lname, a.phone, LEFT(t.title, 10) AS [title], LEFT(s.stor_name, 10) AS [store], sales.qty'
-- e.g. 'EXEC dbo.myStoredProcedure %param1%, %param2%'
@sqlSelect NVARCHAR(512),

-- @sqlFromWhere
-- The FROM and WHERE clauses for the input data (if a SELECT statement)
-- e.g. 'FROM authors a, titleauthor ta, titles t, stores s, sales WHERE a.au_id = ta.au_id AND ta.title_id = t.title_id AND t.title_id = sales.title_id AND s.stor_id = sales.stor_id'
-- N.B. Leave this blank if the source of the data is a stored procedure
@sqlFromWhere NVARCHAR(512) = '',

-- @pivotExpr
-- The column name or expression to use as the pivot (specifies which values are the column headings for the crosstabbed data)
-- e.g. 'store' or '''Store: '' + [store]'
@pivotExpr NVARCHAR(255),

-- @valueExpr
-- A column name or expression to use as the values in the crosstabbed data
-- e.g. 'qty'
@valueExpr NVARCHAR(512),

-- @function
-- The aggregation function to use to combine values in the crosstabbed data
-- e.g. 'MIN'
-- N.B. If this parameter is left blank, the procedure will count up occurences and create additional column headings as necessary, e.g. '[X], [Y], [Y (2)], [Z]' if there are two occurences for label 'Y'
@function NVARCHAR(20) = '',

-- @groupBy
-- The column list to group by (these columns will appear to the left of the crosstabbed data)
-- e.g. 'au_id, au_fname, au_lname, phone'
@groupBy NVARCHAR(512),

-- @sortLookup
-- A table or view which can be queried to provide a sort order for the columns in the crosstabbed data
-- e.g. 'days_of_the_week'
-- N.B. The specified table or view must have 'label' and 'sort_order' columns
-- N.B. If column order is irrelevant, leave this parameter blank
-- N.B. Feature: if @sortLookup is given as '*', the columns are placed in ascending alphabetical order
@sortLookup NVARCHAR(40) = '',

-- @collation
-- The collation sequence
-- e.g. 'Latin1_general_CI_AS'
@collation NVARCHAR(40) = 'Latin1_general_CI_AS',

-- @verbose
-- Set to 1 if you want debug messages
@verbose BIT = 0

AS
BEGIN

-- Check that the mandatory parameters are not empty strings
IF LEN(RTRIM(ISNULL(@sqlSelect, ''))) = 0
BEGIN
RAISERROR ('The @sqlSelect parameter cannot be an empty string.', 16, 1)
ROLLBACK TRANSACTION
END

IF LEN(RTRIM(ISNULL(@pivotExpr, ''))) = 0
BEGIN
RAISERROR ('The @pivotExpr parameter cannot be an empty string.', 16, 1)
ROLLBACK TRANSACTION
END

IF LEN(RTRIM(ISNULL(@valueExpr, ''))) = 0
BEGIN
RAISERROR ('The @valueExpr parameter cannot be an empty string.', 16, 1)
ROLLBACK TRANSACTION
END

IF LEN(RTRIM(ISNULL(@groupBy, ''))) = 0
BEGIN
RAISERROR ('The @groupBy parameter cannot be an empty string.', 16, 1)
ROLLBACK TRANSACTION
END

-- Local variables
DECLARE @i INTEGER
DECLARE @sql NVARCHAR(4000)
DECLARE @sqlX NVARCHAR(4000)
DECLARE @col NVARCHAR(4000)
DECLARE @pivot NVARCHAR(4000)
DECLARE @indx NVARCHAR(10)
DECLARE @cols NVARCHAR(4000)
DECLARE @where NVARCHAR(4000)
DECLARE @update NVARCHAR(4000)
DECLARE @value NVARCHAR(532)
DECLARE @select NVARCHAR(4000)

-- Drop the global temporary tables we will use (if they already exist)
-- N.B. we have to use global temp tables, as local temp tables are dropped at the end of a batch (so are not accessible outside of the sp_executesql context)
-- This syntax is a bit long winded because I couldn't make the substitution syntax for sp_executesql work (for this example only! - a bug in T-SQL?)
SELECT @sql = N'IF EXISTS (SELECT 1 FROM tempdb.dbo.sysobjects WHERE name = ''@table'' AND xtype = ''U'') DROP TABLE @table'
SELECT @sqlX = REPLACE(@sql, N'@table', N'##input')
EXEC sp_executesql @sqlX
SELECT @sqlX = REPLACE(@sql, N'@table', N'##grpCount')
EXEC sp_executesql @sqlX
SELECT @sqlX = REPLACE(@sql, N'@table', N'##colList')
EXEC sp_executesql @sqlX
SELECT @sqlX = REPLACE(@sql, N'@table', N'##lookup')
EXEC sp_executesql @sqlX
SELECT @sqlX = REPLACE(@sql, N'@table', N'##results')
EXEC sp_executesql @sqlX
SELECT @sqlX = REPLACE(@sql, N'@table', N'##temp')
EXEC sp_executesql @sqlX

IF @verbose = 1 SELECT 'Global temporary tables dropped (if present).' AS [Message]

-- * The real work starts here

-- Store the input dataset
IF LEN(RTRIM(@tableSpec)) = 0
BEGIN
-- Create and populate the temporary table in one step
SELECT @sql = @sqlSelect + ' INTO ##input ' + @sqlFromWhere
IF @verbose = 1 SELECT @sql AS [Create and populate the temporary table in one step]
EXEC sp_executesql @sql
END
ELSE
BEGIN
-- Create the temporary table first (the only method that can be used if the data is coming from a stored proc)
SELECT @sql = 'CREATE TABLE ##input (' + @tableSpec + ')'
IF @verbose = 1 SELECT @sql AS [Create the temporary table]
EXEC sp_executesql @sql
-- Populate it
SELECT @sql = 'INSERT INTO ##input ' + @sqlSelect + ' ' + @sqlFromWhere
IF @verbose = 1 SELECT @sql AS [Populate the temporary table]
EXEC sp_executesql @sql
END
IF @verbose = 1 SELECT * FROM ##input

-- Does the query we want to cross-tab already have a column called 'crossTabRowID'? This is a reserved column name!
IF EXISTS(SELECT 1 FROM tempdb.dbo.sysobjects o, tempdb.dbo.syscolumns c WHERE o.id = c.id AND o.name = '##input' AND c.name = 'crossTabRowID')
BEGIN
RAISERROR ('The query passed to the [genericCrosstab] procedure uses a reserved column name (''crossTabRowID'').', 16, 1)
ROLLBACK TRANSACTION
END

-- Add an identity column (we need a 'row ID')
ALTER TABLE ##input ADD crossTabRowID NUMERIC(9, 0) IDENTITY NOT NULL

-- Generate the column list
SELECT @sql = 'SELECT ' + @groupBy + ' INTO ##colList FROM ##input WHERE 1 = 0'
IF @verbose = 1 SELECT @sql AS [Generate the column list]
EXEC sp_executesql @sql
IF @verbose = 1 SELECT * FROM ##colList

-- If no aggregation function is specified, we have to check first that there are no duplicates, and - if there are - we need to handle them
IF RTRIM(ISNULL(@function, '')) = ''
BEGIN
SELECT @sql = 'SELECT ' + @groupBy + ', ' + @pivotExpr + ' COLLATE ' + @collation + ' AS [pivot], COUNT(1) AS [count] INTO ##grpCount FROM ##input GROUP BY ' + @groupBy + ', ' + @pivotExpr + ' COLLATE ' + @collation
END
ELSE
BEGIN
SELECT @sql = 'SELECT ' + @groupBy + ', ' + @pivotExpr + ' COLLATE ' + @collation + ' AS [pivot], 1 AS [count] INTO ##grpCount FROM ##input GROUP BY ' + @groupBy + ', ' + @pivotExpr + ' COLLATE ' + @collation
END
IF @verbose = 1 SELECT @sql AS [Create the ##grpCount table]
EXEC sp_executesql @sql
IF @verbose = 1 SELECT * FROM ##grpCount

-- Create a temporary table that will act as a lookup (containing all of the non-pivot / non-group columns names)
SELECT [pivot], [count] AS [index], [pivot] AS [column_name] INTO ##lookup FROM ##grpCount WHERE 1 = 0

-- Build the results table; one row per group
SELECT @sql = 'SELECT ' + @groupBy + ' INTO ##results FROM ##grpCount GROUP BY ' + @groupBy
IF @verbose = 1 SELECT @sql AS [Create the ##results table]
EXEC sp_executesql @sql
IF @verbose = 1 SELECT * FROM ##results

-- Build the column list, taking into account duplicate occurences of pivotal values
DECLARE xcursor CURSOR FOR SELECT [pivot], MAX([count]) FROM ##grpCount GROUP BY [pivot]
OPEN xcursor

FETCH NEXT FROM xcursor INTO @pivot, @indx

WHILE @@FETCH_STATUS = 0
BEGIN

SELECT @i = 1

-- Loop over indx
WHILE @i <= @indx
BEGIN

-- Build the column list
SELECT @col = CASE @i WHEN 1 THEN @pivot ELSE @pivot + ' (' + CAST(@i AS VARCHAR(10)) + ')' END
INSERT INTO ##lookup VALUES (@pivot, @i, @col)
SELECT @col = '[' + @col + '] NVARCHAR(255) NULL'
SELECT @cols = ISNULL(@cols + ', ', '') + @col

-- Add the column to the results table
SELECT @sql = 'ALTER TABLE ##results ADD ' + @col
IF @verbose = 1 SELECT @sql AS [Add column to the ##results table]
EXEC sp_executesql @sql

-- Continue
SELECT @i = @i + 1

END

FETCH NEXT FROM xcursor INTO @pivot, @indx

END

CLOSE xcursor
DEALLOCATE xcursor

IF @verbose = 1 SELECT * FROM ##lookup
IF @verbose = 1 SELECT * FROM ##results

-- Loop over the column list (using the syscolumns table in the temp database) to build the WHERE clause
DECLARE xcursor CURSOR FOR SELECT DISTINCT c.[colid], c.[name] FROM tempdb.dbo.sysobjects o, tempdb.dbo.syscolumns c WHERE o.[id] = c.[id] AND o.[name] = '##colList' ORDER BY c.[colid]
OPEN xcursor

FETCH NEXT FROM xcursor INTO @indx, @col

WHILE @@FETCH_STATUS = 0
BEGIN

-- Build the WHERE clause
SELECT @where = ISNULL(@where + ' AND ', '') + 'ISNULL(t1.[' + @col + '], '''') = ISNULL(t3.[' + @col + '], '''')'

FETCH NEXT FROM xcursor INTO @indx, @col

END

CLOSE xcursor
DEALLOCATE xcursor

IF @verbose = 1 SELECT @where AS [WHERE clause]

-- Create a temp table that will help us build the UPDATE statements to set the values in the pivot table, and the SELECT statement (with columns in preferred order) afterwards
IF RTRIM(ISNULL(@sortLookup, '')) = ''
BEGIN

-- If no sorting table was specified, just select all columns
SELECT @select = '*'

-- Sort alphabetically
SELECT @sql = 'SELECT DISTINCT t5.[index], c.[name], t5.[pivot] INTO ##temp FROM tempdb.dbo.sysobjects o, tempdb.dbo.syscolumns c, ##lookup t5 WHERE o.[id] = c.[id] AND o.[name] = ''##results'' AND c.[name] COLLATE ' + @collation + ' = t5.[column_name] COLLATE ' + @collation + ' ORDER BY c.[name], t5.[index]'

END
ELSE
BEGIN

-- The 'group by' column(s) always come(s) first
SELECT @select = @groupBy

-- Sort in proscribed order
SELECT @sql = 'SELECT DISTINCT t5.[index], c.[name], t5.[pivot], l.[sort_order] INTO ##temp FROM tempdb.dbo.sysobjects o, tempdb.dbo.syscolumns c, ##lookup t5, ' + @sortLookup + ' l WHERE o.[id] = c.[id] AND o.[name] = ''##results'' AND t5.[pivot] COLLATE ' + @collation + ' *= l.[label] COLLATE ' + @collation + ' AND c.[name] COLLATE ' + @collation + ' = t5.[column_name] COLLATE ' + @collation + ' ORDER BY l.[sort_order], c.[name], t5.[index]'

END

IF @verbose = 1 SELECT @sql AS [Build the ##temp table]
EXEC sp_executesql @sql
IF @verbose = 1 SELECT * FROM ##temp

-- The value expression
SELECT @value = @valueExpr

-- If a function has been specified
IF RTRIM(ISNULL(@function, '')) != ''
BEGIN
SELECT @value = @function + '(' + @value + ')'
END

DECLARE xcursor CURSOR FOR SELECT [index], [name], [pivot] FROM ##temp
OPEN xcursor

FETCH NEXT FROM xcursor INTO @indx, @col, @pivot

WHILE @@FETCH_STATUS = 0
BEGIN

-- Build the SELECT expression
IF @select != '*' SELECT @select = @select + ', ' + @col

-- Create the SET clause of the UPDATE sql
IF RTRIM(ISNULL(@function, '')) = ''
BEGIN
-- No function specified
SELECT @update = '[' + @col + '] = (SELECT ' + @value + ' FROM ##input t1 WHERE ' + @where + ' AND ' + @pivotExpr + ' COLLATE ' + @collation + ' = ''' + @pivot + ''' COLLATE ' + @collation + ' AND t1.crossTabRowID = (SELECT MIN(t0.crossTabRowID) FROM ##input t0 WHERE ' + REPLACE(@where, 't3.', 't0.') + ' AND ' + @pivotExpr + ' COLLATE ' + @collation + ' = ''' + @pivot + '''' + ' COLLATE ' + @collation + ') + (' + CAST(@indx AS VARCHAR(3)) + ' - 1))'
END
ELSE
BEGIN
-- Function specified
SELECT @update = '[' + @col + '] = (SELECT ' + @value + ' FROM ##input t1 WHERE ' + @where + ' AND ' + @pivotExpr + ' COLLATE ' + @collation + ' = ''' + @pivot + ''' COLLATE ' + @collation + ' AND t1.crossTabRowID IN (SELECT t0.crossTabRowID FROM ##input t0 WHERE ' + REPLACE(@where, 't3.', 't0.') + ' AND ' + @pivotExpr + ' COLLATE ' + @collation + ' = ''' + @pivot + '''' + ' COLLATE ' + @collation + '))'
END
SELECT @sql = 'UPDATE ##results SET ' + @update + ' FROM ##results t3'
IF @verbose = 1 SELECT @sql AS [Create the SET clause of the UPDATE sql]
EXEC sp_executesql @sql

FETCH NEXT FROM xcursor INTO @indx, @col, @pivot

END

CLOSE xcursor
DEALLOCATE xcursor

-- Return the results
SELECT @sql = 'SELECT ' + @select + ' FROM ##results'
IF @verbose = 1 SELECT @sql AS [Create the SELECT statement that will return the results]
EXEC sp_executesql @sql

-- Tidy up: drop the global temporary tables
SELECT @sql = N'IF EXISTS (SELECT 1 FROM tempdb.dbo.sysobjects WHERE name = ''@table'' AND xtype = ''U'') DROP TABLE @table'
SELECT @sqlX = REPLACE(@sql, N'@table', N'##input')
EXEC sp_executesql @sqlX
SELECT @sqlX = REPLACE(@sql, N'@table', N'##grpCount')
EXEC sp_executesql @sqlX
SELECT @sqlX = REPLACE(@sql, N'@table', N'##colList')
EXEC sp_executesql @sqlX
SELECT @sqlX = REPLACE(@sql, N'@table', N'##lookup')
EXEC sp_executesql @sqlX
SELECT @sqlX = REPLACE(@sql, N'@table', N'##results')
EXEC sp_executesql @sqlX
SELECT @sqlX = REPLACE(@sql, N'@table', N'##temp')
EXEC sp_executesql @sqlX

END
GO

The following script creates two tables (one of test data, the other to specify a sort order) and runs the genericCrosstab procedure to generate a report.

IF EXISTS (SELECT *
FROM dbo.sysobjects
WHERE [id] = object_id(N'[dbo].[testTable]')
AND OBJECTPROPERTY(id, N'IsTable') = 1)
BEGIN
DROP TABLE [dbo].[testTable]
END
GO

CREATE TABLE testTable
([who] VARCHAR(10) NOT NULL,
[when] DATETIME NOT NULL,
[what] VARCHAR(255) NOT NULL,
[qty] INT NOT NULL)
GO

DELETE FROM testTable
GO

INSERT INTO testTable VALUES ('Fred', '2005-APR-01', 'Coding', 5)
INSERT INTO testTable VALUES ('Jo', '2005-APR-01', 'Coding', 1)
INSERT INTO testTable VALUES ('Jo', '2005-APR-01', 'Testing', 7)
INSERT INTO testTable VALUES ('Fred', '2005-APR-01', 'Coding', 3)
INSERT INTO testTable VALUES ('Tim', '2005-APR-01', 'Coding', 4)
INSERT INTO testTable VALUES ('Fred', '2005-APR-02', 'Coding', 8)
INSERT INTO testTable VALUES ('Jo', '2005-APR-02', 'Coding', 2)
INSERT INTO testTable VALUES ('Jo', '2005-APR-02', 'Testing', 6)
INSERT INTO testTable VALUES ('Tim', '2005-APR-02', 'Coding', 4)
GO

SELECT * FROM testTable
GO

EXEC [dbo].[genericCrosstab]
@sqlSelect = 'SELECT *',
@sqlFromWhere = 'FROM testTable',
@pivotExpr = 'what',
@valueExpr = 'qty',
@function = 'SUM',
@groupBy = 'who'
GO

-- Extending the example to proscribe column order...

IF EXISTS (SELECT *
FROM dbo.sysobjects
WHERE [id] = object_id(N'[dbo].[testSorter]')
AND OBJECTPROPERTY(id, N'IsTable') = 1)
BEGIN
DROP TABLE [dbo].[testSorter]
END
GO

CREATE TABLE testSorter
([label] VARCHAR(255) NOT NULL,
[sort_order] INT NOT NULL)
GO

DELETE FROM testSorter
GO

INSERT INTO testSorter VALUES ('Testing', 1)
INSERT INTO testSorter VALUES ('Coding', 2)
GO

SELECT * FROM testSorter
GO

EXEC [dbo].[genericCrosstab]
@sqlSelect = 'SELECT *',
@sqlFromWhere = 'FROM testTable',
@pivotExpr = 'what',
@valueExpr = 'qty',
@function = 'SUM',
@groupBy = 'who',
@sortLookup = 'testSorter',
@verbose = 0
GO

You can change @verbose to 1 to see what is going on.

Comments: Post a Comment

Subscribe to Post Comments [Atom]





<< Home

This page is powered by Blogger. Isn't yours?

Subscribe to Posts [Atom]