-- List all heap tables
-- Try this with tempdb and see how many temp tables or table variables have been created there, you will be amazed with large olap databases.
SELECT s.name + '.' + t.name AS TableName
FROM sys.tables t INNER JOIN sys.schemas s
ON t.schema_id = s.schema_id INNER JOIN sys.indexes i
ON t.object_id = i.object_id
AND i.type = 0 -- = Heap
ORDER BY TableName
Thursday, 27 December 2012
Database external fragmentation overview
use MyDatabase
go
select top 50
avg_fragmentation_in_percent*s.page_count/100.00 fragmented_page_count
,o.name as TableName
,i.name as IndexName
,s.partition_number
,s.avg_fragmentation_in_percent
,s.page_count
,s.index_depth
,s.index_level
,s.index_type_desc
,d.name as DataBaseName
,s.*
from sys.dm_db_index_physical_stats(db_id(), null, null, null, 'limited') s
--sys.dm_db_index_physical_stats(db_id(), object_id('MyTableInCurrentDB'), null, null, 'limited') s
inner join sys.databases d on d.database_id=s.database_id
inner join sys.objects o on o.object_id=s.object_id
left join sys.indexes i on i.object_id=s.object_id and i.index_id=s.index_id
where -- have large external fragmentation
s.avg_fragmentation_in_percent>10
-- ignore small table/index
and s.page_count>1
-- ignore heap table
and s.index_id>0
order by avg_fragmentation_in_percent*s.page_count desc
go
select top 50
avg_fragmentation_in_percent*s.page_count/100.00 fragmented_page_count
,o.name as TableName
,i.name as IndexName
,s.partition_number
,s.avg_fragmentation_in_percent
,s.page_count
,s.index_depth
,s.index_level
,s.index_type_desc
,d.name as DataBaseName
,s.*
from sys.dm_db_index_physical_stats(db_id(), null, null, null, 'limited') s
--sys.dm_db_index_physical_stats(db_id(), object_id('MyTableInCurrentDB'), null, null, 'limited') s
inner join sys.databases d on d.database_id=s.database_id
inner join sys.objects o on o.object_id=s.object_id
left join sys.indexes i on i.object_id=s.object_id and i.index_id=s.index_id
where -- have large external fragmentation
s.avg_fragmentation_in_percent>10
-- ignore small table/index
and s.page_count>1
-- ignore heap table
and s.index_id>0
order by avg_fragmentation_in_percent*s.page_count desc
Tuesday, 18 December 2012
Checking SQL server stats
-- use dm_db_stats_properties to get stats info
-- for SQL 2012 and above
SELECT
OBJECT_NAME([sp].[object_id]) AS "Table"
,[sp].[stats_id] AS "Statistic ID"
,[s].[name] AS "Statistic"
,[sp].[rows_sampled]
,[sp].[modification_counter] AS "Modifications"
,[sp].[last_updated] AS "Last Updated"
,[sp].[rows]
,[sp].[unfiltered_rows]
FROM [sys].[stats] AS [s]
OUTER APPLY sys.dm_db_stats_properties ([s].[object_id],[s].[stats_id]) AS [sp]
--WHERE [s].[object_id] = OBJECT_ID(N'Sales.TestSalesOrderDetail');
ORDER BY 1,2
--
-- quick way to check which stats objects are used by query
--
-- checking auto created stats which are never updated or updated before 1 week
DECLARE @NumOfDays int, @StateCheckDateTimeDeadLine datetime
SET @NumOfDays=7
SET @StateCheckDateTimeDeadLine=DATEADD(hour, -24*@NumOfDays, getdate())
--SELECT @StateCheckDateTimeDeadLine
SELECT object_name(object_id) as TableName
,name AS StatisticsName
,STATS_DATE(object_id, stats_id) AS StatisticsUpdatDate
,auto_created AS IsAutoCreated
FROM sys.stats
WHERE auto_created=1
-- never updated or updated before dead line
AND (STATS_DATE(object_id, stats_id) IS NULL
OR STATS_DATE(object_id, stats_id)<=@StateCheckDateTimeDeadLine)
order by 1,2
-- after this check detail of each stat
DBCC SHOW_STATISTICS ( table_or_indexed_view_name , target )
[ WITH [ NO_INFOMSGS ] < option > [ , n ] ]
< option > :: =
STAT_HEADER | DENSITY_VECTOR | HISTOGRAM | STATS_STREAM
DBCC SHOW_STATISTICS ("MyTable", MyPrimayKey);
DBCC SHOW_STATISTICS ("MyTable", MyStatsName);
reference:
1)
http://sqlblog.com/blogs/paul_white/archive/2011/09/21/how-to-find-the-statistics-used-to-compile-an-execution-plan.aspx
2)
http://www.sqlskills.com/blogs/erin/understanding-when-statistics-will-automatically-update/
-- for SQL 2012 and above
SELECT
OBJECT_NAME([sp].[object_id]) AS "Table"
,[sp].[stats_id] AS "Statistic ID"
,[s].[name] AS "Statistic"
,[sp].[rows_sampled]
,[sp].[modification_counter] AS "Modifications"
,[sp].[last_updated] AS "Last Updated"
,[sp].[rows]
,[sp].[unfiltered_rows]
FROM [sys].[stats] AS [s]
OUTER APPLY sys.dm_db_stats_properties ([s].[object_id],[s].[stats_id]) AS [sp]
--WHERE [s].[object_id] = OBJECT_ID(N'Sales.TestSalesOrderDetail');
ORDER BY 1,2
--
-- quick way to check which stats objects are used by query
DBCC FREEPROCCACHE
SELECT
p.Name,
total_quantity = SUM(th.Quantity)
FROM AdventureWorks.Production.Product AS p
JOIN AdventureWorks.Production.TransactionHistory AS th ON
th.ProductID = p.ProductID
WHERE
th.ActualCost >= $5.00
AND p.Color = N'Red'
GROUP BY
p.Name
ORDER BY
p.Name
OPTION
(
QUERYTRACEON 3604,
QUERYTRACEON 9292,
QUERYTRACEON 9204
)
--
-- checking auto created stats which are never updated or updated before 1 week
DECLARE @NumOfDays int, @StateCheckDateTimeDeadLine datetime
SET @NumOfDays=7
SET @StateCheckDateTimeDeadLine=DATEADD(hour, -24*@NumOfDays, getdate())
--SELECT @StateCheckDateTimeDeadLine
SELECT object_name(object_id) as TableName
,name AS StatisticsName
,STATS_DATE(object_id, stats_id) AS StatisticsUpdatDate
,auto_created AS IsAutoCreated
FROM sys.stats
WHERE auto_created=1
-- never updated or updated before dead line
AND (STATS_DATE(object_id, stats_id) IS NULL
OR STATS_DATE(object_id, stats_id)<=@StateCheckDateTimeDeadLine)
order by 1,2
-- after this check detail of each stat
DBCC SHOW_STATISTICS ( table_or_indexed_view_name , target )
[ WITH [ NO_INFOMSGS ] < option > [ , n ] ]
< option > :: =
STAT_HEADER | DENSITY_VECTOR | HISTOGRAM | STATS_STREAM
DBCC SHOW_STATISTICS ("MyTable", MyPrimayKey);
DBCC SHOW_STATISTICS ("MyTable", MyStatsName);
reference:
1)
http://sqlblog.com/blogs/paul_white/archive/2011/09/21/how-to-find-the-statistics-used-to-compile-an-execution-plan.aspx
2)
http://www.sqlskills.com/blogs/erin/understanding-when-statistics-will-automatically-update/
Thursday, 13 December 2012
Sizing database/partition/partitioned table with [dm_db_partition_stats]
--
-- whole database size
SELECT
(SUM(reserved_page_count)*8192)/1024000 AS [TotalSpace(MB)]
,(SUM([used_page_count])*8192)/1024000 AS [UsedSpace(MB)]
,(SUM([row_count])) AS [RowCount]
FROM sys.dm_db_partition_stats
--
-- size of each partition scheme
select s.name as [partition_scheme]
,sum(ps.[reserved_page_count])*8192/10240000 AS [TotalSpace(MB)]
,sum(ps.[used_page_count])*8192/10240000 AS [UsedSpace(MB)]
,sum([row_count]) AS [RowCount]
from sys.indexes i inner join sys.partition_schemes s
on i.data_space_id = s.data_space_id
inner join
(
SELECT
[object_id]
,[index_id]
,sum([used_page_count]) as [used_page_count]
,sum([reserved_page_count]) as [reserved_page_count]
,sum([row_count]) as [row_count]
FROM [sys].[dm_db_partition_stats]
group by [object_id],[index_id]
) ps
on ps.object_id=i.object_id and ps.index_id=i.index_id
group by s.name
order by 1,2
--
-- size of partitioned table
select
object_schema_name(i.object_id) as [schema]
,object_name(i.object_id) as [object]
,i.name as [index]
,ps.[used_page_count]
,ps.[reserved_page_count]
,ps.[row_count]
,s.name as [partition_scheme]
,f.name as [patition_function]
from sys.indexes i inner join sys.partition_schemes s
on i.data_space_id = s.data_space_id
inner join sys.partition_functions f
on f.function_id = s.function_id
inner join
(
SELECT
[object_id]
,[index_id]
,sum([used_page_count]) as [used_page_count]
,sum([reserved_page_count]) as [reserved_page_count]
,sum([row_count]) as [row_count]
FROM [sys].[dm_db_partition_stats]
group by [object_id],[index_id]
) ps
on ps.object_id=i.object_id and ps.index_id=i.index_id
order by 1,2,3
-- whole database size
SELECT
(SUM(reserved_page_count)*8192)/1024000 AS [TotalSpace(MB)]
,(SUM([used_page_count])*8192)/1024000 AS [UsedSpace(MB)]
,(SUM([row_count])) AS [RowCount]
FROM sys.dm_db_partition_stats
--
-- size of each partition scheme
select s.name as [partition_scheme]
,sum(ps.[reserved_page_count])*8192/10240000 AS [TotalSpace(MB)]
,sum(ps.[used_page_count])*8192/10240000 AS [UsedSpace(MB)]
,sum([row_count]) AS [RowCount]
from sys.indexes i inner join sys.partition_schemes s
on i.data_space_id = s.data_space_id
inner join
(
SELECT
[object_id]
,[index_id]
,sum([used_page_count]) as [used_page_count]
,sum([reserved_page_count]) as [reserved_page_count]
,sum([row_count]) as [row_count]
FROM [sys].[dm_db_partition_stats]
group by [object_id],[index_id]
) ps
on ps.object_id=i.object_id and ps.index_id=i.index_id
group by s.name
order by 1,2
--
-- size of partitioned table
select
object_schema_name(i.object_id) as [schema]
,object_name(i.object_id) as [object]
,i.name as [index]
,ps.[used_page_count]
,ps.[reserved_page_count]
,ps.[row_count]
,s.name as [partition_scheme]
,f.name as [patition_function]
from sys.indexes i inner join sys.partition_schemes s
on i.data_space_id = s.data_space_id
inner join sys.partition_functions f
on f.function_id = s.function_id
inner join
(
SELECT
[object_id]
,[index_id]
,sum([used_page_count]) as [used_page_count]
,sum([reserved_page_count]) as [reserved_page_count]
,sum([row_count]) as [row_count]
FROM [sys].[dm_db_partition_stats]
group by [object_id],[index_id]
) ps
on ps.object_id=i.object_id and ps.index_id=i.index_id
order by 1,2,3
Saturday, 8 December 2012
Inmon vs Kimball – Architecture Comparison and Data Valut Architect
1) A good slide show for reference:
http://www.slideshare.net/kgraziano/introduction-to-data-vault-modeling#btnNext
Inmon states that the data warehouse is:
- Subject-oriented
- The data in the data warehouse is organized so that all the data elements relating to the same real-world event or object are linked together.
- Non-volatile
- Data in the data warehouse are never over-written or deleted — once committed, the data are static, read-only, and retained for future reporting.
- Integrated
- The data warehouse contains data from most or all of an organization's operational systems and these data are made consistent.
- Time-variant
- For An operational system, the stored data contains the current value.
Kimball: DW is where we publish used data.
Data Valut by Dan:
Hub: Business Key
Link: represent 3nf many to many relation for Business key
Satellite: description for business key
2)
From:
http://oracledwbi.wordpress.com/2010/03/25/inmon-vs-kimball-architecture-comparison/
Inmon – Hub & Spoke | Kimball – Bus Architecture | |
Methodology & Architecture | ||
Overall approach | Top-down | Bottom-up |
Architectural structure | Enterprise wide (atomic) data warehouse “feeds” departmental databases | Data marts model a single business process; enterprise consistency achieved through data bus and conformed dimensions |
Complexity of the method | Quite complex | Fairly simple |
Comparison with established development methodologies | Derived from the spiral methodology | Four-step process; a departure from RDBMS methods |
Discussion of physical design | Fairly thorough | Fairly light |
Scalability | Growing scope and changing requirements are critical | Need to adapt to highly volatile needs within a limited scope |
Application | Strategic solution | Tactical solution |
Information interdependence | Supported | Supported |
Deployment cost | Higher start-up costs, with lower subsequent project development costs | Lower start-up costs, with each subsequent project costing about the same |
Time to deploy | Longer start-up time | Quick start-up time |
Development skills required | Larger team(s) of specialists | Small teams of generalists |
Persistency of data | High rate of change from source systems | Source systems are relatively stable |
Data modelling | ||
Data orientation | Subject or data-driven | Process oriented |
Tools | Traditional (ERDs, DISs) | Dimensional modelling |
End-user accessibility | Low | High |
Philosophy | ||
Primary audience | IT professionals | End users |
Place in the organisation | Corporate wide facility | Growth subject to departmental demand |
Objective | Deliver a sound technical solution based on proven database methods and technologies | Deliver a solution that makes it easy for end users to directly query the data and still get reasonable response times |
Subscribe to:
Posts (Atom)